Datum dbms_assert_object_name(PG_FUNCTION_ARGS) { List *names; text *str; char *object_name; Oid classId; if (PG_ARGISNULL(0)) INVALID_OBJECT_NAME_EXCEPTION(); str = PG_GETARG_TEXT_P(0); if (EMPTY_STR(str)) INVALID_OBJECT_NAME_EXCEPTION(); object_name = text_to_cstring(str); names = stringToQualifiedNameList(object_name); #if PG_VERSION_NUM >= 90200 classId = RangeVarGetRelid(makeRangeVarFromNameList(names), NoLock, true); #else classId = RangeVarGetRelid(makeRangeVarFromNameList(names), true); #endif if (!OidIsValid(classId)) INVALID_OBJECT_NAME_EXCEPTION(); PG_RETURN_TEXT_P(str); }
static Datum assign_callgraph_buffer_id() { List *names; Oid seqoid; names = stringToQualifiedNameList("call_graph.seqCallGraphBuffer"); #if PG_VERSION_NUM >= 90200 seqoid = RangeVarGetRelid(makeRangeVarFromNameList(names), NoLock, false); #else seqoid = RangeVarGetRelid(makeRangeVarFromNameList(names), false); #endif return DirectFunctionCall1(nextval_oid, ObjectIdGetDatum(seqoid)); }
/* * DefineRule * Execute a CREATE RULE command. */ ObjectAddress DefineRule(RuleStmt *stmt, const char *queryString) { List *actions; Node *whereClause; Oid relId; /* Parse analysis. */ transformRuleStmt(stmt, queryString, &actions, &whereClause); /* * Find and lock the relation. Lock level should match * DefineQueryRewrite. */ relId = RangeVarGetRelid(stmt->relation, AccessExclusiveLock, false); /* ... and execute */ return DefineQueryRewrite(stmt->rulename, relId, whereClause, stmt->event, stmt->instead, stmt->replace, actions); }
/* * text_regclass: convert text to regclass */ Datum text_regclass(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); Oid result; RangeVar *rv; rv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); result = RangeVarGetRelid(rv, false, true /*allowHcatalog*/); PG_RETURN_OID(result); }
Datum gp_aovisimap_entry_name(PG_FUNCTION_ARGS) { RangeVar *parentrv; text *relname = PG_GETARG_TEXT_P(0); Oid relid; parentrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); relid = RangeVarGetRelid(parentrv, false); return gp_aovisimap_entry_internal(fcinfo, relid); }
/* * text_regclass: convert text to regclass * * This could be replaced by CoerceViaIO, except that we need to treat * text-to-regclass as an implicit cast to support legacy forms of nextval() * and related functions. */ Datum text_regclass(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); Oid result; RangeVar *rv; rv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); /* We might not even have permissions on this relation; don't lock it. */ result = RangeVarGetRelid(rv, NoLock, false); PG_RETURN_OID(result); }
/* Finds the relationId from a potentially qualified relation name. */ Oid ResolveRelationId(text *relationName) { List *relationNameList = NIL; RangeVar *relation = NULL; Oid relationId = InvalidOid; bool failOK = false; /* error if relation cannot be found */ /* resolve relationId from passed in schema and relation name */ relationNameList = textToQualifiedNameList(relationName); relation = makeRangeVarFromNameList(relationNameList); relationId = RangeVarGetRelid(relation, NoLock, failOK); return relationId; }
Datum row_security_active_name(PG_FUNCTION_ARGS) { /* By qualified name */ text *tablename = PG_GETARG_TEXT_P(0); RangeVar *tablerel; Oid tableoid; int rls_status; /* Look up table name. Can't lock it - we might not have privileges. */ tablerel = makeRangeVarFromNameList(textToQualifiedNameList(tablename)); tableoid = RangeVarGetRelid(tablerel, NoLock, false); rls_status = check_enable_rls(tableoid, InvalidOid, true); PG_RETURN_BOOL(rls_status == RLS_ENABLED); }
static bool BufferedWriterParam(BufferedWriter *self, const char *keyword, char *value) { if (CompareKeyword(keyword, "TABLE") || CompareKeyword(keyword, "OUTPUT")) { ASSERT_ONCE(self->base.output == NULL); self->base.relid = RangeVarGetRelid(makeRangeVarFromNameList( stringToQualifiedNameList(value)), NoLock, false); self->base.output = get_relation_name(self->base.relid); } else if (CompareKeyword(keyword, "DUPLICATE_BADFILE")) { ASSERT_ONCE(self->base.dup_badfile == NULL); self->base.dup_badfile = pstrdup(value); } else if (CompareKeyword(keyword, "DUPLICATE_ERRORS")) { ASSERT_ONCE(self->base.max_dup_errors < -1); self->base.max_dup_errors = ParseInt64(value, -1); if (self->base.max_dup_errors == -1) self->base.max_dup_errors = INT64_MAX; } else if (CompareKeyword(keyword, "ON_DUPLICATE_KEEP")) { const ON_DUPLICATE values[] = { ON_DUPLICATE_KEEP_NEW, ON_DUPLICATE_KEEP_OLD }; self->base.on_duplicate = values[choice(keyword, value, ON_DUPLICATE_NAMES, lengthof(values))]; } else if (CompareKeyword(keyword, "TRUNCATE")) { self->base.truncate = ParseBoolean(value); } else return false; /* unknown parameter */ return true; }
/* * Note: nextval with a text argument is no longer exported as a pg_proc * entry, but we keep it around to ease porting of C code that may have * called the function directly. */ Datum nextval(PG_FUNCTION_ARGS) { text *seqin = PG_GETARG_TEXT_P(0); RangeVar *sequence; Oid relid; sequence = makeRangeVarFromNameList(textToQualifiedNameList(seqin)); /* * XXX: This is not safe in the presence of concurrent DDL, but acquiring * a lock here is more expensive than letting nextval_internal do it, * since the latter maintains a cache that keeps us from hitting the lock * manager more than once per transaction. It's not clear whether the * performance penalty is material in practice, but for now, we do it this * way. */ relid = RangeVarGetRelid(sequence, NoLock, false); PG_RETURN_INT64(nextval_internal(relid)); }
/* * to_regclass - converts "classname" to class OID * * If the name is not found, we return NULL. */ Datum to_regclass(PG_FUNCTION_ARGS) { char *class_name = PG_GETARG_CSTRING(0); Oid result; List *names; /* * Parse the name into components and see if it matches any pg_class * entries in the current search path. */ names = stringToQualifiedNameList(class_name); /* We might not even have permissions on this relation; don't lock it. */ result = RangeVarGetRelid(makeRangeVarFromNameList(names), NoLock, true); if (OidIsValid(result)) PG_RETURN_OID(result); else PG_RETURN_NULL(); }
/* * DefineRule * Execute a CREATE RULE command. */ void DefineRule(RuleStmt *stmt, const char *queryString) { List *actions; Node *whereClause; Oid relId; /* Parse analysis ... */ transformRuleStmt(stmt, queryString, &actions, &whereClause); /* ... find the relation ... */ relId = RangeVarGetRelid(stmt->relation, false); /* ... and execute */ DefineQueryRewrite(stmt->rulename, relId, whereClause, stmt->event, stmt->instead, stmt->replace, actions); }
/* * Executes an ALTER OBJECT / RENAME TO statement. Based on the object * type, the function appropriate to that type is executed. */ void ExecRenameStmt(RenameStmt *stmt) { switch (stmt->renameType) { case OBJECT_AGGREGATE: RenameAggregate(stmt->object, stmt->objarg, stmt->newname); break; case OBJECT_CONVERSION: RenameConversion(stmt->object, stmt->newname); break; case OBJECT_DATABASE: RenameDatabase(stmt->subname, stmt->newname); break; case OBJECT_FUNCTION: RenameFunction(stmt->object, stmt->objarg, stmt->newname); break; case OBJECT_LANGUAGE: RenameLanguage(stmt->subname, stmt->newname); break; case OBJECT_OPCLASS: RenameOpClass(stmt->object, stmt->subname, stmt->newname); break; case OBJECT_ROLE: RenameRole(stmt->subname, stmt->newname); break; case OBJECT_SCHEMA: RenameSchema(stmt->subname, stmt->newname); break; case OBJECT_TABLESPACE: RenameTableSpace(stmt->subname, stmt->newname); break; case OBJECT_TABLE: case OBJECT_INDEX: case OBJECT_COLUMN: case OBJECT_TRIGGER: { Oid relid; CheckRelationOwnership(stmt->relation, true); relid = RangeVarGetRelid(stmt->relation, false); switch (stmt->renameType) { case OBJECT_TABLE: case OBJECT_INDEX: { /* * RENAME TABLE requires that we (still) hold * CREATE rights on the containing namespace, as * well as ownership of the table. */ Oid namespaceId = get_rel_namespace(relid); AclResult aclresult; aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_CREATE); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_NAMESPACE, get_namespace_name(namespaceId)); renamerel(relid, stmt->newname); break; } case OBJECT_COLUMN: renameatt(relid, stmt->subname, /* old att name */ stmt->newname, /* new att name */ interpretInhOption(stmt->relation->inhOpt), /* recursive? */ false); /* recursing already? */ break; case OBJECT_TRIGGER: renametrig(relid, stmt->subname, /* old att name */ stmt->newname); /* new att name */ break; default: /* can't happen */ ; } break; } default: elog(ERROR, "unrecognized rename stmt type: %d", (int) stmt->renameType); } }
/* * regclassin - converts "classname" to class OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_class entry. */ Datum regclassin(PG_FUNCTION_ARGS) { char *class_name_or_oid = PG_GETARG_CSTRING(0); Oid result = InvalidOid; List *names; /* '-' ? */ if (strcmp(class_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (class_name_or_oid[0] >= '0' && class_name_or_oid[0] <= '9' && strspn(class_name_or_oid, "0123456789") == strlen(class_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(class_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a name, possibly schema-qualified */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_class for a match. This is needed for initializing * other system catalogs (pg_namespace may not exist yet, and certainly * there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { int matches = 0; result = caql_getoid_plus( NULL, &matches, NULL, cql("SELECT oid FROM pg_class " " WHERE relname = :1 ", CStringGetDatum(class_name_or_oid))); if (0 == matches) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("relation \"%s\" does not exist", class_name_or_oid))); } /* We assume there can be only one match */ PG_RETURN_OID(result); } /* * Normal case: parse the name into components and see if it matches any * pg_class entries in the current search path. */ names = stringToQualifiedNameList(class_name_or_oid, "regclassin"); result = RangeVarGetRelid(makeRangeVarFromNameList(names), false, true /*allowHcatalog*/); PG_RETURN_OID(result); }
/* * Open the local relation associated with the remote one. * * Optionally rebuilds the Relcache mapping if it was invalidated * by local DDL. */ LogicalRepRelMapEntry * logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode) { LogicalRepRelMapEntry *entry; bool found; if (LogicalRepRelMap == NULL) logicalrep_relmap_init(); /* Search for existing entry. */ entry = hash_search(LogicalRepRelMap, (void *) &remoteid, HASH_FIND, &found); if (!found) elog(ERROR, "no relation map entry for remote relation ID %u", remoteid); /* Need to update the local cache? */ if (!OidIsValid(entry->localreloid)) { Oid relid; int i; int found; Bitmapset *idkey; TupleDesc desc; LogicalRepRelation *remoterel; MemoryContext oldctx; remoterel = &entry->remoterel; /* Try to find and lock the relation by name. */ relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname, remoterel->relname, -1), lockmode, true); if (!OidIsValid(relid)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("logical replication target relation \"%s.%s\" does not exist", remoterel->nspname, remoterel->relname))); entry->localrel = heap_open(relid, NoLock); /* * We currently only support writing to regular and partitioned * tables. */ if (entry->localrel->rd_rel->relkind != RELKIND_RELATION) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("logical replication target relation \"%s.%s\" is not a table", remoterel->nspname, remoterel->relname))); /* * Build the mapping of local attribute numbers to remote attribute * numbers and validate that we don't miss any replicated columns * as that would result in potentially unwanted data loss. */ desc = RelationGetDescr(entry->localrel); oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext); entry->attrmap = palloc(desc->natts * sizeof(int)); MemoryContextSwitchTo(oldctx); found = 0; for (i = 0; i < desc->natts; i++) { int attnum = logicalrep_rel_att_by_name(remoterel, NameStr(desc->attrs[i]->attname)); entry->attrmap[i] = attnum; if (attnum >= 0) found++; } /* TODO, detail message with names of missing columns */ if (found < remoterel->natts) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("logical replication target relation \"%s.%s\" is missing " "some replicated columns", remoterel->nspname, remoterel->relname))); /* * Check that replica identity matches. We allow for stricter replica * identity (fewer columns) on subscriber as that will not stop us * from finding unique tuple. IE, if publisher has identity * (id,timestamp) and subscriber just (id) this will not be a problem, * but in the opposite scenario it will. * * Don't throw any error here just mark the relation entry as not * updatable, as replica identity is only for updates and deletes * but inserts can be replicated even without it. */ entry->updatable = true; idkey = RelationGetIndexAttrBitmap(entry->localrel, INDEX_ATTR_BITMAP_IDENTITY_KEY); /* fallback to PK if no replica identity */ if (idkey == NULL) { idkey = RelationGetIndexAttrBitmap(entry->localrel, INDEX_ATTR_BITMAP_PRIMARY_KEY); /* * If no replica identity index and no PK, the published table * must have replica identity FULL. */ if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL) entry->updatable = false; } i = -1; while ((i = bms_next_member(idkey, i)) >= 0) { int attnum = i + FirstLowInvalidHeapAttributeNumber; if (!AttrNumberIsForUserDefinedAttr(attnum)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("logical replication target relation \"%s.%s\" uses " "system columns in REPLICA IDENTITY index", remoterel->nspname, remoterel->relname))); attnum = AttrNumberGetAttrOffset(attnum); if (!bms_is_member(entry->attrmap[attnum], remoterel->attkeys)) { entry->updatable = false; break; } } entry->localreloid = relid; } else entry->localrel = heap_open(entry->localreloid, lockmode); return entry; }
Datum spgstat(PG_FUNCTION_ARGS) { text *name=PG_GETARG_TEXT_P(0); char *relname=text_to_cstring(name); RangeVar *relvar; Relation index; List *relname_list; Oid relOid; BlockNumber blkno = SPGIST_HEAD_BLKNO; BlockNumber totalPages = 0, innerPages = 0, emptyPages = 0; double usedSpace = 0.0; char res[1024]; int bufferSize = -1; int64 innerTuples = 0, leafTuples = 0; relname_list = stringToQualifiedNameList(relname); relvar = makeRangeVarFromNameList(relname_list); relOid = RangeVarGetRelid(relvar, false); index = index_open(relOid, AccessExclusiveLock); if ( index->rd_am == NULL ) elog(ERROR, "Relation %s.%s is not an index", get_namespace_name(RelationGetNamespace(index)), RelationGetRelationName(index) ); totalPages = RelationGetNumberOfBlocks(index); for(blkno=SPGIST_HEAD_BLKNO; blkno<totalPages; blkno++) { Buffer buffer; Page page; buffer = ReadBuffer(index, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); if (SpGistPageIsLeaf(page)) { leafTuples += SpGistPageGetMaxOffset(page); } else { innerPages++; innerTuples += SpGistPageGetMaxOffset(page); } if (bufferSize < 0) bufferSize = BufferGetPageSize(buffer) - MAXALIGN(sizeof(SpGistPageOpaqueData)) - SizeOfPageHeaderData; usedSpace += bufferSize - (PageGetFreeSpace(page) + sizeof(ItemIdData)); if (PageGetFreeSpace(page) + sizeof(ItemIdData) == bufferSize) emptyPages++; UnlockReleaseBuffer(buffer); } index_close(index, AccessExclusiveLock); totalPages--; /* metapage */ snprintf(res, sizeof(res), "totalPages: %u\n" "innerPages: %u\n" "leafPages: %u\n" "emptyPages: %u\n" "usedSpace: %.2f kbytes\n" "freeSpace: %.2f kbytes\n" "fillRatio: %.2f%c\n" "leafTuples: %lld\n" "innerTuples: %lld", totalPages, innerPages, totalPages - innerPages, emptyPages, usedSpace / 1024.0, (( (double) bufferSize ) * ( (double) totalPages ) - usedSpace) / 1024, 100.0 * ( usedSpace / (( (double) bufferSize ) * ( (double) totalPages )) ), '%', leafTuples, innerTuples ); PG_RETURN_TEXT_P(CStringGetTextDatum(res)); }
/* * CitusCopyFrom implements the COPY table_name FROM ... for hash-partitioned * and range-partitioned tables. */ void CitusCopyFrom(CopyStmt *copyStatement, char *completionTag) { Oid tableId = RangeVarGetRelid(copyStatement->relation, NoLock, false); char *relationName = get_rel_name(tableId); Relation distributedRelation = NULL; char partitionMethod = '\0'; Var *partitionColumn = NULL; TupleDesc tupleDescriptor = NULL; uint32 columnCount = 0; Datum *columnValues = NULL; bool *columnNulls = NULL; TypeCacheEntry *typeEntry = NULL; FmgrInfo *hashFunction = NULL; FmgrInfo *compareFunction = NULL; int shardCount = 0; List *shardIntervalList = NULL; ShardInterval **shardIntervalCache = NULL; bool useBinarySearch = false; HTAB *shardConnectionHash = NULL; ShardConnections *shardConnections = NULL; List *connectionList = NIL; EState *executorState = NULL; MemoryContext executorTupleContext = NULL; ExprContext *executorExpressionContext = NULL; CopyState copyState = NULL; CopyOutState copyOutState = NULL; FmgrInfo *columnOutputFunctions = NULL; uint64 processedRowCount = 0; /* disallow COPY to/from file or program except for superusers */ if (copyStatement->filename != NULL && !superuser()) { if (copyStatement->is_program) { ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to COPY to or from an external program"), errhint("Anyone can COPY to stdout or from stdin. " "psql's \\copy command also works for anyone."))); } else { ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to COPY to or from a file"), errhint("Anyone can COPY to stdout or from stdin. " "psql's \\copy command also works for anyone."))); } } partitionColumn = PartitionColumn(tableId, 0); partitionMethod = PartitionMethod(tableId); if (partitionMethod != DISTRIBUTE_BY_RANGE && partitionMethod != DISTRIBUTE_BY_HASH) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY is only supported for hash- and " "range-partitioned tables"))); } /* resolve hash function for partition column */ typeEntry = lookup_type_cache(partitionColumn->vartype, TYPECACHE_HASH_PROC_FINFO); hashFunction = &(typeEntry->hash_proc_finfo); /* resolve compare function for shard intervals */ compareFunction = ShardIntervalCompareFunction(partitionColumn, partitionMethod); /* allocate column values and nulls arrays */ distributedRelation = heap_open(tableId, RowExclusiveLock); tupleDescriptor = RelationGetDescr(distributedRelation); columnCount = tupleDescriptor->natts; columnValues = palloc0(columnCount * sizeof(Datum)); columnNulls = palloc0(columnCount * sizeof(bool)); /* load the list of shards and verify that we have shards to copy into */ shardIntervalList = LoadShardIntervalList(tableId); if (shardIntervalList == NIL) { if (partitionMethod == DISTRIBUTE_BY_HASH) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not find any shards into which to copy"), errdetail("No shards exist for distributed table \"%s\".", relationName), errhint("Run master_create_worker_shards to create shards " "and try again."))); } else { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not find any shards into which to copy"), errdetail("No shards exist for distributed table \"%s\".", relationName))); } } /* prevent concurrent placement changes and non-commutative DML statements */ LockAllShards(shardIntervalList); /* initialize the shard interval cache */ shardCount = list_length(shardIntervalList); shardIntervalCache = SortedShardIntervalArray(shardIntervalList); /* determine whether to use binary search */ if (partitionMethod != DISTRIBUTE_BY_HASH || !IsUniformHashDistribution(shardIntervalCache, shardCount)) { useBinarySearch = true; } /* initialize copy state to read from COPY data source */ copyState = BeginCopyFrom(distributedRelation, copyStatement->filename, copyStatement->is_program, copyStatement->attlist, copyStatement->options); executorState = CreateExecutorState(); executorTupleContext = GetPerTupleMemoryContext(executorState); executorExpressionContext = GetPerTupleExprContext(executorState); copyOutState = (CopyOutState) palloc0(sizeof(CopyOutStateData)); copyOutState->binary = true; copyOutState->fe_msgbuf = makeStringInfo(); copyOutState->rowcontext = executorTupleContext; columnOutputFunctions = ColumnOutputFunctions(tupleDescriptor, copyOutState->binary); /* * Create a mapping of shard id to a connection for each of its placements. * The hash should be initialized before the PG_TRY, since it is used and * PG_CATCH. Otherwise, it may be undefined in the PG_CATCH (see sigsetjmp * documentation). */ shardConnectionHash = CreateShardConnectionHash(); /* we use a PG_TRY block to roll back on errors (e.g. in NextCopyFrom) */ PG_TRY(); { ErrorContextCallback errorCallback; /* set up callback to identify error line number */ errorCallback.callback = CopyFromErrorCallback; errorCallback.arg = (void *) copyState; errorCallback.previous = error_context_stack; error_context_stack = &errorCallback; /* ensure transactions have unique names on worker nodes */ InitializeDistributedTransaction(); while (true) { bool nextRowFound = false; Datum partitionColumnValue = 0; ShardInterval *shardInterval = NULL; int64 shardId = 0; bool shardConnectionsFound = false; MemoryContext oldContext = NULL; ResetPerTupleExprContext(executorState); oldContext = MemoryContextSwitchTo(executorTupleContext); /* parse a row from the input */ nextRowFound = NextCopyFrom(copyState, executorExpressionContext, columnValues, columnNulls, NULL); if (!nextRowFound) { MemoryContextSwitchTo(oldContext); break; } CHECK_FOR_INTERRUPTS(); /* find the partition column value */ if (columnNulls[partitionColumn->varattno - 1]) { ereport(ERROR, (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("cannot copy row with NULL value " "in partition column"))); } partitionColumnValue = columnValues[partitionColumn->varattno - 1]; /* find the shard interval and id for the partition column value */ shardInterval = FindShardInterval(partitionColumnValue, shardIntervalCache, shardCount, partitionMethod, compareFunction, hashFunction, useBinarySearch); if (shardInterval == NULL) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not find shard for partition column " "value"))); } shardId = shardInterval->shardId; MemoryContextSwitchTo(oldContext); /* get existing connections to the shard placements, if any */ shardConnections = GetShardConnections(shardConnectionHash, shardId, &shardConnectionsFound); if (!shardConnectionsFound) { /* open connections and initiate COPY on shard placements */ OpenCopyTransactions(copyStatement, shardConnections); /* send binary headers to shard placements */ resetStringInfo(copyOutState->fe_msgbuf); AppendCopyBinaryHeaders(copyOutState); SendCopyDataToAll(copyOutState->fe_msgbuf, shardConnections->connectionList); } /* replicate row to shard placements */ resetStringInfo(copyOutState->fe_msgbuf); AppendCopyRowData(columnValues, columnNulls, tupleDescriptor, copyOutState, columnOutputFunctions); SendCopyDataToAll(copyOutState->fe_msgbuf, shardConnections->connectionList); processedRowCount += 1; } connectionList = ConnectionList(shardConnectionHash); /* send binary footers to all shard placements */ resetStringInfo(copyOutState->fe_msgbuf); AppendCopyBinaryFooters(copyOutState); SendCopyDataToAll(copyOutState->fe_msgbuf, connectionList); /* all lines have been copied, stop showing line number in errors */ error_context_stack = errorCallback.previous; /* close the COPY input on all shard placements */ EndRemoteCopy(connectionList, true); if (CopyTransactionManager == TRANSACTION_MANAGER_2PC) { PrepareRemoteTransactions(connectionList); } EndCopyFrom(copyState); heap_close(distributedRelation, NoLock); /* check for cancellation one last time before committing */ CHECK_FOR_INTERRUPTS(); } PG_CATCH(); { List *abortConnectionList = NIL; /* roll back all transactions */ abortConnectionList = ConnectionList(shardConnectionHash); EndRemoteCopy(abortConnectionList, false); AbortRemoteTransactions(abortConnectionList); CloseConnections(abortConnectionList); PG_RE_THROW(); } PG_END_TRY(); /* * Ready to commit the transaction, this code is below the PG_TRY block because * we do not want any of the transactions rolled back if a failure occurs. Instead, * they should be rolled forward. */ CommitRemoteTransactions(connectionList); CloseConnections(connectionList); if (completionTag != NULL) { snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "COPY " UINT64_FORMAT, processedRowCount); } }
/* * master_apply_delete_command takes in a delete command, finds shards that * match the criteria defined in the delete command, drops the found shards from * the worker nodes, and updates the corresponding metadata on the master node. * This function drops a shard if and only if all rows in the shard satisfy * the conditions in the delete command. Note that this function only accepts * conditions on the partition key and if no condition is provided then all * shards are deleted. * * We mark shard placements that we couldn't drop as to be deleted later. If a * shard satisfies the given conditions, we delete it from shard metadata table * even though related shard placements are not deleted. */ Datum master_apply_delete_command(PG_FUNCTION_ARGS) { text *queryText = PG_GETARG_TEXT_P(0); char *queryString = text_to_cstring(queryText); char *relationName = NULL; char *schemaName = NULL; Oid relationId = InvalidOid; List *shardIntervalList = NIL; List *deletableShardIntervalList = NIL; List *queryTreeList = NIL; Query *deleteQuery = NULL; Node *whereClause = NULL; Node *deleteCriteria = NULL; Node *queryTreeNode = NULL; DeleteStmt *deleteStatement = NULL; int droppedShardCount = 0; LOCKMODE lockMode = 0; char partitionMethod = 0; bool failOK = false; #if (PG_VERSION_NUM >= 100000) RawStmt *rawStmt = (RawStmt *) ParseTreeRawStmt(queryString); queryTreeNode = rawStmt->stmt; #else queryTreeNode = ParseTreeNode(queryString); #endif EnsureCoordinator(); CheckCitusVersion(ERROR); if (!IsA(queryTreeNode, DeleteStmt)) { ereport(ERROR, (errmsg("query \"%s\" is not a delete statement", queryString))); } deleteStatement = (DeleteStmt *) queryTreeNode; schemaName = deleteStatement->relation->schemaname; relationName = deleteStatement->relation->relname; /* * We take an exclusive lock while dropping shards to prevent concurrent * writes. We don't want to block SELECTs, which means queries might fail * if they access a shard that has just been dropped. */ lockMode = ExclusiveLock; relationId = RangeVarGetRelid(deleteStatement->relation, lockMode, failOK); /* schema-prefix if it is not specified already */ if (schemaName == NULL) { Oid schemaId = get_rel_namespace(relationId); schemaName = get_namespace_name(schemaId); } CheckDistributedTable(relationId); EnsureTablePermissions(relationId, ACL_DELETE); #if (PG_VERSION_NUM >= 100000) queryTreeList = pg_analyze_and_rewrite(rawStmt, queryString, NULL, 0, NULL); #else queryTreeList = pg_analyze_and_rewrite(queryTreeNode, queryString, NULL, 0); #endif deleteQuery = (Query *) linitial(queryTreeList); CheckTableCount(deleteQuery); /* get where clause and flatten it */ whereClause = (Node *) deleteQuery->jointree->quals; deleteCriteria = eval_const_expressions(NULL, whereClause); partitionMethod = PartitionMethod(relationId); if (partitionMethod == DISTRIBUTE_BY_HASH) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot delete from hash distributed table with this " "command"), errdetail("Delete statements on hash-partitioned tables " "are not supported with master_apply_delete_command."), errhint("Use master_modify_multiple_shards command instead."))); } else if (partitionMethod == DISTRIBUTE_BY_NONE) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot delete from distributed table"), errdetail("Delete statements on reference tables " "are not supported."))); } CheckDeleteCriteria(deleteCriteria); CheckPartitionColumn(relationId, deleteCriteria); shardIntervalList = LoadShardIntervalList(relationId); /* drop all shards if where clause is not present */ if (deleteCriteria == NULL) { deletableShardIntervalList = shardIntervalList; ereport(DEBUG2, (errmsg("dropping all shards for \"%s\"", relationName))); } else { deletableShardIntervalList = ShardsMatchingDeleteCriteria(relationId, shardIntervalList, deleteCriteria); } droppedShardCount = DropShards(relationId, schemaName, relationName, deletableShardIntervalList); PG_RETURN_INT32(droppedShardCount); }
/* * Delete error log of the specified relation. This returns true from master * iif all segments and master find the relation. */ Datum gp_truncate_error_log(PG_FUNCTION_ARGS) { text *relname; char *relname_str; RangeVar *relrv; Oid relid; bool allResults = true; relname = PG_GETARG_TEXT_P(0); relname_str = text_to_cstring(relname); if (strcmp(relname_str, "*.*") == 0) { /* * Only superuser is allowed to delete log files across database. */ if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to delete all error log files")))); ErrorLogDelete(InvalidOid, InvalidOid); } else if (strcmp(relname_str, "*") == 0) { /* * Database owner can delete error log files. */ if (!pg_database_ownercheck(MyDatabaseId, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_DATABASE, get_database_name(MyDatabaseId)); ErrorLogDelete(MyDatabaseId, InvalidOid); } else { AclResult aclresult; relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); relid = RangeVarGetRelid(relrv, true); /* Return false if the relation does not exist. */ if (!OidIsValid(relid)) PG_RETURN_BOOL(false); /* * Allow only the table owner to truncate error log. */ aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_TRUNCATE); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_CLASS, relrv->relname); /* We don't care if this fails or not. */ ErrorLogDelete(MyDatabaseId, relid); } /* * Dispatch the work to segments. */ if (Gp_role == GP_ROLE_DISPATCH) { int i = 0; StringInfoData sql; CdbPgResults cdb_pgresults = {NULL, 0}; initStringInfo(&sql); appendStringInfo(&sql, "SELECT pg_catalog.gp_truncate_error_log(%s)", quote_literal_internal(text_to_cstring(relname))); CdbDispatchCommand(sql.data, DF_WITH_SNAPSHOT, &cdb_pgresults); for (i = 0; i < cdb_pgresults.numResults; i++) { Datum value; bool isnull; struct pg_result *pgresult = cdb_pgresults.pg_results[i]; if (PQresultStatus(pgresult) != PGRES_TUPLES_OK) { cdbdisp_clearCdbPgResults(&cdb_pgresults); ereport(ERROR, (errmsg("unexpected result from segment: %d", PQresultStatus(pgresult)))); } value = ResultToDatum(pgresult, 0, 0, boolin, &isnull); allResults &= (!isnull && DatumGetBool(value)); } cdbdisp_clearCdbPgResults(&cdb_pgresults); pfree(sql.data); } /* Return true iif all segments return true. */ PG_RETURN_BOOL(allResults); }
/* * FetchTableCommon executes common logic that wraps around the actual data * fetching function. This common logic includes ensuring that only one process * tries to fetch this table at any given time, and that data fetch operations * are retried in case of node failures. */ static void FetchTableCommon(text *tableNameText, uint64 remoteTableSize, ArrayType *nodeNameObject, ArrayType *nodePortObject, bool (*FetchTableFunction)(const char *, uint32, const char *)) { uint64 shardId = INVALID_SHARD_ID; Oid relationId = InvalidOid; List *relationNameList = NIL; RangeVar *relation = NULL; uint32 nodeIndex = 0; bool tableFetched = false; char *tableName = text_to_cstring(tableNameText); Datum *nodeNameArray = DeconstructArrayObject(nodeNameObject); Datum *nodePortArray = DeconstructArrayObject(nodePortObject); int32 nodeNameCount = ArrayObjectCount(nodeNameObject); int32 nodePortCount = ArrayObjectCount(nodePortObject); /* we should have the same number of node names and port numbers */ if (nodeNameCount != nodePortCount) { ereport(ERROR, (errmsg("node name array size: %d and node port array size: %d" " do not match", nodeNameCount, nodePortCount))); } /* * We lock on the shardId, but do not unlock. When the function returns, and * the transaction for this function commits, this lock will automatically * be released. This ensures that concurrent caching commands will see the * newly created table when they acquire the lock (in read committed mode). */ shardId = ExtractShardId(tableName); LockShardResource(shardId, AccessExclusiveLock); relationNameList = textToQualifiedNameList(tableNameText); relation = makeRangeVarFromNameList(relationNameList); relationId = RangeVarGetRelid(relation, NoLock, true); /* check if we already fetched the table */ if (relationId != InvalidOid) { uint64 localTableSize = 0; if (!ExpireCachedShards) { return; } /* * Check if the cached shard has the same size on disk as it has as on * the placement (is up to date). * * Note 1: performing updates or deletes on the original shard leads to * inconsistent sizes between different databases in which case the data * would be fetched every time, or worse, the placement would get into * a deadlock when it tries to fetch from itself while holding the lock. * Therefore, this option is disabled by default. * * Note 2: when appending data to a shard, the size on disk only * increases when a new page is added (the next 8kB block). */ localTableSize = LocalTableSize(relationId); if (remoteTableSize > localTableSize) { /* table is not up to date, drop the table */ ObjectAddress tableObject = { InvalidOid, InvalidOid, 0 }; tableObject.classId = RelationRelationId; tableObject.objectId = relationId; tableObject.objectSubId = 0; performDeletion(&tableObject, DROP_RESTRICT, PERFORM_DELETION_INTERNAL); } else { /* table is up to date */ return; } } /* loop until we fetch the table or try all nodes */ while (!tableFetched && (nodeIndex < nodeNameCount)) { Datum nodeNameDatum = nodeNameArray[nodeIndex]; Datum nodePortDatum = nodePortArray[nodeIndex]; char *nodeName = TextDatumGetCString(nodeNameDatum); uint32 nodePort = DatumGetUInt32(nodePortDatum); tableFetched = (*FetchTableFunction)(nodeName, nodePort, tableName); nodeIndex++; } /* error out if we tried all nodes and could not fetch the table */ if (!tableFetched) { ereport(ERROR, (errmsg("could not fetch relation: \"%s\"", tableName))); } }
/* * AlterSequence * * Modify the definition of a sequence relation */ ObjectAddress AlterSequence(AlterSeqStmt *stmt) { Oid relid; SeqTable elm; Relation seqrel; Buffer buf; HeapTupleData seqtuple; Form_pg_sequence seq; FormData_pg_sequence new___; List *owned_by; ObjectAddress address; /* Open and lock sequence. */ relid = RangeVarGetRelid(stmt->sequence, AccessShareLock, stmt->missing_ok); if (relid == InvalidOid) { ereport(NOTICE, (errmsg("relation \"%s\" does not exist, skipping", stmt->sequence->relname))); return InvalidObjectAddress; } init_sequence(relid, &elm, &seqrel); /* allow ALTER to sequence owner only */ if (!pg_class_ownercheck(relid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, stmt->sequence->relname); /* lock page' buffer and read tuple into new___ sequence structure */ seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple); /* Copy old values of options into workspace */ memcpy(&new___, seq, sizeof(FormData_pg_sequence)); /* Check and set new___ values */ init_params(stmt->options, false, &new___, &owned_by); /* Clear local cache so that we don't think we have cached numbers */ /* Note that we do not change the currval() state */ elm->cached = elm->last; /* check the comment above nextval_internal()'s equivalent call. */ if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); /* Now okay to update the on-disk tuple */ START_CRIT_SECTION(); memcpy(seq, &new___, sizeof(FormData_pg_sequence)); MarkBufferDirty(buf); /* XLOG stuff */ if (RelationNeedsWAL(seqrel)) { xl_seq_rec xlrec; XLogRecPtr recptr; Page page = BufferGetPage(buf); XLogBeginInsert(); XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); xlrec.node = seqrel->rd_node; XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec)); XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); PageSetLSN(page, recptr); } END_CRIT_SECTION(); UnlockReleaseBuffer(buf); /* process OWNED BY if given */ if (owned_by) process_owned_by(seqrel, owned_by); InvokeObjectPostAlterHook(RelationRelationId, relid, 0); ObjectAddressSet(address, RelationRelationId, relid); relation_close(seqrel, NoLock); return address; }
/* * Creates a sample table with data from a PXF table. * We need to create a copy of the PXF table, in order to pass the sampling * parameters pxf_sample_ratio and pxf_max_fragments as attributes, * and to create a segment reject limit of 25 percent. * * The new PXF table is sampled and the results are saved in the returned sample table. * Note that ANALYZE can be executed only by the database owner. * It is safe to assume that the database owner has permissions to create temp tables. * The sampling is done by uniformly sampling pxf_sample_ratio records of each fragments, * up to pxf_max_fragments. * * Input: * relationOid - relation to be sampled * sampleTableName - sample table name, moderately unique * lAttributeNames - attributes to be included in the sample * relTuples - estimated size of relation * relFrags - estimated number of fragments in relation * requestedSampleSize - as determined by attribute statistics requirements. * sampleTableRelTuples - limit on size of the sample. * Output: * sampleTableRelTuples - number of tuples in the sample table created. */ Oid buildPxfSampleTable(Oid relationOid, char* sampleTableName, List *lAttributeNames, float4 relTuples, float4 relFrags, float4 requestedSampleSize, float4 *sampleTableRelTuples) { const char *schemaName = get_namespace_name(get_rel_namespace(relationOid)); /* must be pfreed */ const char *tableName = get_rel_name(relationOid); /* must be pfreed */ char *sampleSchemaName = pstrdup("pg_temp"); char *pxfSampleTable = temporarySampleTableName(relationOid, "pg_analyze_pxf"); /* must be pfreed */ Oid sampleTableOid = InvalidOid; Oid pxfSampleTableOid = InvalidOid; RangeVar *rangeVar = NULL; float4 pxfSamplingRatio = 0.0; Assert(requestedSampleSize > 0.0); Assert(relTuples > 0.0); Assert(relFrags > 0.0); /* calculate pxf_sample_ratio */ pxfSamplingRatio = calculateSamplingRatio(relTuples, relFrags, requestedSampleSize); /* build copy of original pxf table */ buildPxfTableCopy(relationOid, pxfSamplingRatio, pxf_stat_max_fragments, schemaName, tableName, sampleSchemaName, pxfSampleTable); rangeVar = makeRangeVar(NULL /*catalogname*/, sampleSchemaName, pxfSampleTable, -1); pxfSampleTableOid = RangeVarGetRelid(rangeVar, true /* failOK */, false /*allowHcatalog*/); buildSampleFromPxf(sampleSchemaName, sampleTableName, pxfSampleTable, lAttributeNames, sampleTableRelTuples); rangeVar = makeRangeVar(NULL /*catalogname*/, sampleSchemaName, sampleTableName, -1); sampleTableOid = RangeVarGetRelid(rangeVar, true /* failOK */, false /*allowHcatalog*/); Assert(sampleTableOid != InvalidOid); /** * MPP-10723: Very rarely, we may be unlucky and generate an empty sample table. We error out in this case rather than * generate bad statistics. */ if (*sampleTableRelTuples < 1.0) { elog(ERROR, "ANALYZE unable to generate accurate statistics on table %s.%s. Try lowering gp_analyze_relative_error", quote_identifier(schemaName), quote_identifier(tableName)); } if (pxfSampleTableOid != InvalidOid) { elog(DEBUG2, "ANALYZE dropping PXF sample table"); dropSampleTable(pxfSampleTableOid, true); } pfree((void *) rangeVar); pfree((void *) pxfSampleTable); pfree((void *) tableName); pfree((void *) schemaName); pfree((void *) sampleSchemaName); return sampleTableOid; }
/* * Executes an ALTER OBJECT / RENAME TO statement. Based on the object * type, the function appropriate to that type is executed. */ void ExecRenameStmt(RenameStmt *stmt) { switch (stmt->renameType) { case OBJECT_AGGREGATE: RenameAggregate(stmt->object, stmt->objarg, stmt->newname); break; case OBJECT_CONVERSION: RenameConversion(stmt->object, stmt->newname); break; case OBJECT_DATABASE: RenameDatabase(stmt->subname, stmt->newname); break; case OBJECT_EXTPROTOCOL: RenameExtProtocol(stmt->subname, stmt->newname); break; case OBJECT_FUNCTION: RenameFunction(stmt->object, stmt->objarg, stmt->newname); break; case OBJECT_LANGUAGE: RenameLanguage(stmt->subname, stmt->newname); break; case OBJECT_OPCLASS: RenameOpClass(stmt->object, stmt->subname, stmt->newname); break; case OBJECT_OPFAMILY: RenameOpFamily(stmt->object, stmt->subname, stmt->newname); break; case OBJECT_ROLE: RenameRole(stmt->subname, stmt->newname); break; case OBJECT_SCHEMA: RenameSchema(stmt->subname, stmt->newname); break; case OBJECT_TABLESPACE: RenameTableSpace(stmt->subname, stmt->newname); break; case OBJECT_FILESPACE: RenameFileSpace(stmt->subname, stmt->newname); break; case OBJECT_TABLE: case OBJECT_SEQUENCE: case OBJECT_VIEW: case OBJECT_INDEX: { if (Gp_role == GP_ROLE_DISPATCH) { CheckRelationOwnership(stmt->relation, true); stmt->objid = RangeVarGetRelid(stmt->relation, false); } /* * RENAME TABLE requires that we (still) hold * CREATE rights on the containing namespace, as * well as ownership of the table. */ Oid namespaceId = get_rel_namespace(stmt->objid); AclResult aclresult; aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(), ACL_CREATE); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_NAMESPACE, get_namespace_name(namespaceId)); renamerel(stmt->objid, stmt->newname, stmt->renameType, stmt); break; } case OBJECT_COLUMN: case OBJECT_TRIGGER: { Oid relid; CheckRelationOwnership(stmt->relation, true); relid = RangeVarGetRelid(stmt->relation, false); switch (stmt->renameType) { case OBJECT_COLUMN: renameatt(relid, stmt->subname, /* old att name */ stmt->newname, /* new att name */ interpretInhOption(stmt->relation->inhOpt), /* recursive? */ false); /* recursing already? */ break; case OBJECT_TRIGGER: renametrig(relid, stmt->subname, /* old att name */ stmt->newname); /* new att name */ break; default: /* can't happen */ ; } break; } case OBJECT_TSPARSER: RenameTSParser(stmt->object, stmt->newname); break; case OBJECT_TSDICTIONARY: RenameTSDictionary(stmt->object, stmt->newname); break; case OBJECT_TSTEMPLATE: RenameTSTemplate(stmt->object, stmt->newname); break; case OBJECT_TSCONFIGURATION: RenameTSConfiguration(stmt->object, stmt->newname); break; default: elog(ERROR, "unrecognized rename stmt type: %d", (int) stmt->renameType); } if (Gp_role == GP_ROLE_DISPATCH) { CdbDispatchUtilityStatement((Node *) stmt, "ExecRenameStmt"); } }
/* * regclassin - converts "classname" to class OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_class entry. */ Datum regclassin(PG_FUNCTION_ARGS) { char *class_name_or_oid = PG_GETARG_CSTRING(0); Oid result = InvalidOid; List *names; /* '-' ? */ if (strcmp(class_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (class_name_or_oid[0] >= '0' && class_name_or_oid[0] <= '9' && strspn(class_name_or_oid, "0123456789") == strlen(class_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(class_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a name, possibly schema-qualified */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_class for a match. This is needed for initializing * other system catalogs (pg_namespace may not exist yet, and certainly * there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { Relation hdesc; ScanKeyData skey[1]; SysScanDesc sysscan; HeapTuple tuple; ScanKeyInit(&skey[0], Anum_pg_class_relname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(class_name_or_oid)); hdesc = heap_open(RelationRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, ClassNameNspIndexId, true, NULL, 1, skey); if (HeapTupleIsValid(tuple = systable_getnext(sysscan))) result = HeapTupleGetOid(tuple); else ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("relation \"%s\" does not exist", class_name_or_oid))); /* We assume there can be only one match */ systable_endscan(sysscan); heap_close(hdesc, AccessShareLock); PG_RETURN_OID(result); } /* * Normal case: parse the name into components and see if it matches any * pg_class entries in the current search path. */ names = stringToQualifiedNameList(class_name_or_oid); /* We might not even have permissions on this relation; don't lock it. */ result = RangeVarGetRelid(makeRangeVarFromNameList(names), NoLock, false); PG_RETURN_OID(result); }
/* * gp_read_error_log * * Returns set of error log tuples. */ Datum gp_read_error_log(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; ReadErrorLogContext *context; HeapTuple tuple; Datum result; /* * First call setup */ if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; FILE *fp; text *relname; funcctx = SRF_FIRSTCALL_INIT(); relname = PG_GETARG_TEXT_P(0); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); context = palloc0(sizeof(ReadErrorLogContext)); funcctx->user_fctx = (void *) context; funcctx->tuple_desc = BlessTupleDesc(GetErrorTupleDesc()); /* * Though this function is usually executed on segment, we dispatch * the execution if it happens to be on QD, and combine the results * into one set. */ if (Gp_role == GP_ROLE_DISPATCH) { struct CdbPgResults cdb_pgresults = {NULL, 0}; StringInfoData sql; int i; initStringInfo(&sql); /* * construct SQL */ appendStringInfo(&sql, "SELECT * FROM pg_catalog.gp_read_error_log(%s) ", quote_literal_internal(text_to_cstring(relname))); CdbDispatchCommand(sql.data, DF_WITH_SNAPSHOT, &cdb_pgresults); for (i = 0; i < cdb_pgresults.numResults; i++) { if (PQresultStatus(cdb_pgresults.pg_results[i]) != PGRES_TUPLES_OK) { cdbdisp_clearCdbPgResults(&cdb_pgresults); elog(ERROR, "unexpected result from segment: %d", PQresultStatus(cdb_pgresults.pg_results[i])); } context->numTuples += PQntuples(cdb_pgresults.pg_results[i]); } pfree(sql.data); context->segResults = cdb_pgresults.pg_results; context->numSegResults = cdb_pgresults.numResults; } else { /* * In QE, read the error log. */ RangeVar *relrv; Oid relid; relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); relid = RangeVarGetRelid(relrv, true); /* * If the relation has gone, silently return no tuples. */ if (OidIsValid(relid)) { AclResult aclresult; /* * Requires SELECT priv to read error log. */ aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_CLASS, relrv->relname); ErrorLogFileName(context->filename, MyDatabaseId, relid); fp = AllocateFile(context->filename, "r"); context->fp = fp; } } MemoryContextSwitchTo(oldcontext); if (Gp_role != GP_ROLE_DISPATCH && !context->fp) { pfree(context); SRF_RETURN_DONE(funcctx); } } funcctx = SRF_PERCALL_SETUP(); context = (ReadErrorLogContext *) funcctx->user_fctx; /* * Read error log, probably on segments. We don't check Gp_role, however, * in case master also wants to read the file. */ if (context->fp) { pg_crc32 crc, written_crc; tuple = ErrorLogRead(context->fp, &written_crc); /* * CRC check. */ if (HeapTupleIsValid(tuple)) { INIT_CRC32C(crc); COMP_CRC32C(crc, tuple->t_data, tuple->t_len); FIN_CRC32C(crc); if (!EQ_CRC32C(crc, written_crc)) { elog(LOG, "incorrect checksum in error log %s", context->filename); tuple = NULL; } } /* * If we found a valid tuple, return it. Otherwise, fall through * in the DONE routine. */ if (HeapTupleIsValid(tuple)) { /* * We need to set typmod for the executor to understand * its type we just blessed. */ HeapTupleHeaderSetTypMod(tuple->t_data, funcctx->tuple_desc->tdtypmod); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } } /* * If we got results from dispatch, return all the tuples. */ while (context->currentResult < context->numSegResults) { Datum values[NUM_ERRORTABLE_ATTR]; bool isnull[NUM_ERRORTABLE_ATTR]; PGresult *segres = context->segResults[context->currentResult]; int row = context->currentRow; if (row >= PQntuples(segres)) { context->currentRow = 0; context->currentResult++; continue; } context->currentRow++; MemSet(isnull, false, sizeof(isnull)); values[0] = ResultToDatum(segres, row, 0, timestamptz_in, &isnull[0]); values[1] = ResultToDatum(segres, row, 1, textin, &isnull[1]); values[2] = ResultToDatum(segres, row, 2, textin, &isnull[2]); values[3] = ResultToDatum(segres, row, 3, int4in, &isnull[3]); values[4] = ResultToDatum(segres, row, 4, int4in, &isnull[4]); values[5] = ResultToDatum(segres, row, 5, textin, &isnull[5]); values[6] = ResultToDatum(segres, row, 6, textin, &isnull[6]); values[7] = ResultToDatum(segres, row, 7, byteain, &isnull[7]); tuple = heap_form_tuple(funcctx->tuple_desc, values, isnull); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } if (context->segResults != NULL) { int i; for (i = 0; i < context->numSegResults; i++) PQclear(context->segResults[i]); /* XXX: better to copy to palloc'ed area */ free(context->segResults); } /* * Close the file, if we have opened it. */ if (context->fp != NULL) { FreeFile(context->fp); context->fp = NULL; } SRF_RETURN_DONE(funcctx); }