static void cq_bgproc_main(Datum arg) { void (*run) (void); int default_priority = getpriority(PRIO_PROCESS, MyProcPid); int priority; MyContQueryProc = (ContQueryProc *) DatumGetPointer(arg); BackgroundWorkerUnblockSignals(); BackgroundWorkerInitializeConnection(NameStr(MyContQueryProc->group->db_name), NULL); /* if we got a cancel signal in prior command, quit */ CHECK_FOR_INTERRUPTS(); MyContQueryProc->latch = &MyProc->procLatch; ereport(LOG, (errmsg("continuous query process \"%s\" running with pid %d", GetContQueryProcName(MyContQueryProc), MyProcPid))); pgstat_report_activity(STATE_RUNNING, GetContQueryProcName(MyContQueryProc)); /* * The BackgroundWorkerHandle's slot is always in the range [0, max_worker_processes) * and will be unique for any background process being run. We use this knowledge to * assign our continuous query processes's a unique ID that fits within any TupleBuffer's * waiters Bitmapset. */ MyContQueryProc->id = MyContQueryProc->handle.slot; /* * Be nice! * * More is less here. A higher number indicates a lower scheduling priority. */ priority = Max(default_priority, MAX_PRIORITY - ceil(continuous_query_proc_priority * (MAX_PRIORITY - default_priority))); priority = nice(priority); switch (MyContQueryProc->type) { case Combiner: am_cont_combiner = true; run = &ContinuousQueryCombinerMain; break; case Worker: am_cont_worker = true; run = &ContinuousQueryWorkerMain; break; default: ereport(ERROR, (errmsg("continuous queries can only be run as worker or combiner processes"))); } /* initialize process level CQ stats */ cq_stat_init(&MyProcCQStats, 0, MyProcPid); run(); /* purge proc level CQ stats */ cq_stat_send_purge(0, MyProcPid, IsContQueryWorkerProcess() ? CQ_STAT_WORKER : CQ_STAT_COMBINER); }
void worker_main(Datum arg) { int ret; StringInfoData buf; uint32 segment = UInt32GetDatum(arg); /* Setup signal handlers */ pqsignal(SIGHUP, worker_sighup); pqsignal(SIGTERM, worker_sigterm); /* Allow signals */ BackgroundWorkerUnblockSignals(); initialize_worker(segment); /* Connect to the database */ BackgroundWorkerInitializeConnection(job->datname, job->rolname); elog(LOG, "%s initialized running job id %d", MyBgworkerEntry->bgw_name, job->job_id); pgstat_report_appname(MyBgworkerEntry->bgw_name); /* Initialize the query text */ initStringInfo(&buf); appendStringInfo(&buf, "SELECT * FROM %s.%s(%d, NULL)", job_run_function.schema, job_run_function.name, job->job_id); /* Initialize the SPI subsystem */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, buf.data); SetCurrentStatementStartTimestamp(); /* And run the query */ ret = SPI_execute(buf.data, true, 0); if (ret < 0) elog(FATAL, "errors while executing %s", buf.data); /* Commmit the transaction */ SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); proc_exit(0); }
static void BgwPoolMainLoop(Datum arg) { BgwPoolExecutorCtx* ctx = (BgwPoolExecutorCtx*)arg; int id = ctx->id; BgwPool* pool = ctx->constructor(); int size; void* work; BackgroundWorkerUnblockSignals(); BackgroundWorkerInitializeConnection(pool->dbname, NULL); while(true) { PGSemaphoreLock(&pool->available); SpinLockAcquire(&pool->lock); size = *(int*)&pool->queue[pool->head]; Assert(size < pool->size); work = malloc(size); pool->pending -= 1; pool->active += 1; if (pool->lastPeakTime == 0 && pool->active == pool->nWorkers && pool->pending != 0) { pool->lastPeakTime = MtmGetSystemTime(); } if (pool->head + size + 4 > pool->size) { memcpy(work, pool->queue, size); pool->head = INTALIGN(size); } else { memcpy(work, &pool->queue[pool->head+4], size); pool->head += 4 + INTALIGN(size); } if (pool->size == pool->head) { pool->head = 0; } if (pool->producerBlocked) { pool->producerBlocked = false; PGSemaphoreUnlock(&pool->overflow); pool->lastPeakTime = 0; } SpinLockRelease(&pool->lock); pool->executor(id, work, size); free(work); SpinLockAcquire(&pool->lock); pool->active -= 1; pool->lastPeakTime = 0; SpinLockRelease(&pool->lock); } }
static void config_log_main(Datum main_arg) { config_log_objects *objects; pqsignal(SIGTERM, config_log_sigterm); pqsignal(SIGHUP, config_log_sighup); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to database */ BackgroundWorkerInitializeConnection(config_log_database, NULL); /* Verify expected objects exist */ objects = initialize_objects(); while (!got_sigterm) { int rc; rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 100000L); ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* * In case of a SIGHUP, just reload the configuration. */ if (got_sighup) { got_sighup = false; ProcessConfigFile(PGC_SIGHUP); execute_pg_settings_logger(objects); } } proc_exit(0); }
void worker_spi_main(Datum main_arg) { int index = DatumGetInt32(main_arg); worktable *table; StringInfoData buf; char name[20]; table = palloc(sizeof(worktable)); sprintf(name, "schema%d", index); table->schema = pstrdup(name); table->name = pstrdup("counted"); /* Establish signal handlers before unblocking signals. */ pqsignal(SIGHUP, worker_spi_sighup); pqsignal(SIGTERM, worker_spi_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to our database */ BackgroundWorkerInitializeConnection("postgres", NULL); elog(LOG, "%s initialized with %s.%s", MyBgworkerEntry->bgw_name, table->schema, table->name); initialize_worker_spi(table); /* * Quote identifiers passed to us. Note that this must be done after * initialize_worker_spi, because that routine assumes the names are not * quoted. * * Note some memory might be leaked here. */ table->schema = quote_identifier(table->schema); table->name = quote_identifier(table->name); initStringInfo(&buf); appendStringInfo(&buf, "WITH deleted AS (DELETE " "FROM %s.%s " "WHERE type = 'delta' RETURNING value), " "total AS (SELECT coalesce(sum(value), 0) as sum " "FROM deleted) " "UPDATE %s.%s " "SET value = %s.value + total.sum " "FROM total WHERE type = 'total' " "RETURNING %s.value", table->schema, table->name, table->schema, table->name, table->name, table->name); /* * Main loop: do this until the SIGTERM handler tells us to terminate */ while (!got_sigterm) { int ret; int rc; /* * Background workers mustn't call usleep() or any direct equivalent: * instead, they may wait on their process latch, which sleeps as * necessary, but is awakened if postmaster dies. That way the * background process goes away immediately in an emergency. */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, worker_spi_naptime * 1000L); ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* * In case of a SIGHUP, just reload the configuration. */ if (got_sighup) { got_sighup = false; ProcessConfigFile(PGC_SIGHUP); } /* * Start a transaction on which we can run queries. Note that each * StartTransactionCommand() call should be preceded by a * SetCurrentStatementStartTimestamp() call, which sets both the time * for the statement we're about the run, and also the transaction * start time. Also, each other query sent to SPI should probably be * preceded by SetCurrentStatementStartTimestamp(), so that statement * start time is always up to date. * * The SPI_connect() call lets us run queries through the SPI manager, * and the PushActiveSnapshot() call creates an "active" snapshot * which is necessary for queries to have MVCC data to work on. * * The pgstat_report_activity() call makes our activity visible * through the pgstat views. */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, buf.data); /* We can now execute queries via SPI */ ret = SPI_execute(buf.data, false, 0); if (ret != SPI_OK_UPDATE_RETURNING) elog(FATAL, "cannot select from table %s.%s: error code %d", table->schema, table->name, ret); if (SPI_processed > 0) { bool isnull; int32 val; val = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); if (!isnull) elog(LOG, "%s: count in %s.%s is now %d", MyBgworkerEntry->bgw_name, table->schema, table->name, val); } /* * And finish our transaction. */ SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); } proc_exit(1); }
/* * kafka_consume_main * * Main function for Kafka consumers running as background workers */ void kafka_consume_main(Datum arg) { char err_msg[512]; rd_kafka_topic_conf_t *topic_conf; rd_kafka_t *kafka; rd_kafka_topic_t *topic; rd_kafka_message_t **messages; const struct rd_kafka_metadata *meta; struct rd_kafka_metadata_topic topic_meta; rd_kafka_resp_err_t err; bool found; Oid id = (Oid) arg; ListCell *lc; KafkaConsumerProc *proc = hash_search(consumer_procs, &id, HASH_FIND, &found); KafkaConsumer consumer; CopyStmt *copy; int valid_brokers = 0; int i; int my_partitions = 0; if (!found) elog(ERROR, "kafka consumer %d not found", id); pqsignal(SIGTERM, kafka_consume_main_sigterm); #define BACKTRACE_SEGFAULTS #ifdef BACKTRACE_SEGFAULTS pqsignal(SIGSEGV, debug_segfault); #endif /* we're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* give this proc access to the database */ BackgroundWorkerInitializeConnection(NameStr(proc->dbname), NULL); /* load saved consumer state */ StartTransactionCommand(); load_consumer_state(proc->consumer_id, &consumer); copy = get_copy_statement(&consumer); topic_conf = rd_kafka_topic_conf_new(); kafka = rd_kafka_new(RD_KAFKA_CONSUMER, NULL, err_msg, sizeof(err_msg)); rd_kafka_set_logger(kafka, logger); /* * Add all brokers currently in pipeline_kafka_brokers */ if (consumer.brokers == NIL) elog(ERROR, "no valid brokers were found"); foreach(lc, consumer.brokers) valid_brokers += rd_kafka_brokers_add(kafka, lfirst(lc)); if (!valid_brokers) elog(ERROR, "no valid brokers were found"); /* * Set up our topic to read from */ topic = rd_kafka_topic_new(kafka, consumer.topic, topic_conf); err = rd_kafka_metadata(kafka, false, topic, &meta, CONSUMER_TIMEOUT); if (err != RD_KAFKA_RESP_ERR_NO_ERROR) elog(ERROR, "failed to acquire metadata: %s", rd_kafka_err2str(err)); Assert(meta->topic_cnt == 1); topic_meta = meta->topics[0]; load_consumer_offsets(&consumer, &topic_meta, proc->offset); CommitTransactionCommand(); /* * Begin consuming all partitions that this process is responsible for */ for (i = 0; i < topic_meta.partition_cnt; i++) { int partition = topic_meta.partitions[i].id; Assert(partition <= consumer.num_partitions); if (partition % consumer.parallelism != proc->partition_group) continue; elog(LOG, "[kafka consumer] %s <- %s consuming partition %d from offset %ld", consumer.rel->relname, consumer.topic, partition, consumer.offsets[partition]); if (rd_kafka_consume_start(topic, partition, consumer.offsets[partition]) == -1) elog(ERROR, "failed to start consuming: %s", rd_kafka_err2str(rd_kafka_errno2err(errno))); my_partitions++; } /* * No point doing anything if we don't have any partitions assigned to us */ if (my_partitions == 0) { elog(LOG, "[kafka consumer] %s <- %s consumer %d doesn't have any partitions to read from", consumer.rel->relname, consumer.topic, MyProcPid); goto done; } messages = palloc0(sizeof(rd_kafka_message_t) * consumer.batch_size); /* * Consume messages until we are terminated */ while (!got_sigterm) { ssize_t num_consumed; int i; int messages_buffered = 0; int partition; StringInfoData buf; bool xact = false; for (partition = 0; partition < consumer.num_partitions; partition++) { if (partition % consumer.parallelism != proc->partition_group) continue; num_consumed = rd_kafka_consume_batch(topic, partition, CONSUMER_TIMEOUT, messages, consumer.batch_size); if (num_consumed <= 0) continue; if (!xact) { StartTransactionCommand(); xact = true; } initStringInfo(&buf); for (i = 0; i < num_consumed; i++) { if (messages[i]->payload != NULL) { appendBinaryStringInfo(&buf, messages[i]->payload, messages[i]->len); if (buf.len > 0 && buf.data[buf.len - 1] != '\n') appendStringInfoChar(&buf, '\n'); messages_buffered++; } consumer.offsets[partition] = messages[i]->offset; rd_kafka_message_destroy(messages[i]); } } if (!xact) { pg_usleep(1 * 1000); continue; } /* we don't want to die in the event of any errors */ PG_TRY(); { if (messages_buffered) execute_copy(copy, &buf); } PG_CATCH(); { elog(LOG, "[kafka consumer] %s <- %s failed to process batch, dropped %d message%s:", consumer.rel->relname, consumer.topic, (int) num_consumed, (num_consumed == 1 ? "" : "s")); EmitErrorReport(); FlushErrorState(); AbortCurrentTransaction(); xact = false; } PG_END_TRY(); if (!xact) StartTransactionCommand(); if (messages_buffered) save_consumer_state(&consumer, proc->partition_group); CommitTransactionCommand(); } done: hash_search(consumer_procs, &id, HASH_REMOVE, NULL); rd_kafka_topic_destroy(topic); rd_kafka_destroy(kafka); rd_kafka_wait_destroyed(CONSUMER_TIMEOUT); }
void worker_test_main(Datum main_arg) { dsm_segment *seg; volatile test_shm_mq_header *hdr; PGPROC *registrant; pqsignal(SIGHUP, handle_sighup); pqsignal(SIGTERM, handle_sigterm); BackgroundWorkerUnblockSignals(); printf("worker_test_main: %d\n", DatumGetInt32(main_arg)); CurrentResourceOwner = ResourceOwnerCreate(NULL, "worker test"); seg = dsm_attach(DatumGetInt32(main_arg)); if (seg == NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("unable to map dynamic shared memory segment"))); hdr = dsm_segment_address(seg); /* 開始 */ SpinLockAcquire(&hdr->mutex); hdr->workers_ready++; hdr->workers_attached++; SpinLockRelease(&hdr->mutex); registrant = BackendPidGetProc(MyBgworkerEntry->bgw_notify_pid); if (registrant == NULL) { elog(DEBUG1, "registrant backend has exited prematurely"); proc_exit(1); } SetLatch(®istrant->procLatch); /* Do the work */ BackgroundWorkerInitializeConnection(hdr->dbname, NULL); printf("DSM: %p\n", dsm_segment_address); #if 0 SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, "initializing spi_worker schema"); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); #endif dsm_detach(seg); proc_exit(0); }
/* * PgOctopusWorkerMain is the main entry-point for the background worker * that performs health checks. */ static void PgOctopusWorkerMain(Datum arg) { MemoryContext healthCheckContext = NULL; /* Establish signal handlers before unblocking signals. */ pqsignal(SIGHUP, pg_octopus_sighup); pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, pg_octopus_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to our database */ BackgroundWorkerInitializeConnection("postgres", NULL); healthCheckContext = AllocSetContextCreate(CurrentMemoryContext, "Health check context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(healthCheckContext); elog(LOG, "pg_octopus monitor started"); /* * Main loop: do this until the SIGTERM handler tells us to terminate */ while (!got_sigterm) { struct timeval currentTime = {0, 0}; struct timeval roundEndTime = {0, 0}; struct timeval timeout = {0, 0}; List *nodeHealthList = NIL; List *healthCheckList = NIL; gettimeofday(¤tTime, NULL); roundEndTime = AddTimeMillis(currentTime, HealthCheckPeriod); nodeHealthList = LoadNodeHealthList(); healthCheckList = CreateHealthChecks(nodeHealthList); DoHealthChecks(healthCheckList); MemoryContextReset(healthCheckContext); gettimeofday(¤tTime, NULL); timeout = SubtractTimes(roundEndTime, currentTime); if (timeout.tv_sec >= 0 && timeout.tv_usec >= 0) { LatchWait(timeout); } if (got_sighup) { got_sighup = false; ProcessConfigFile(PGC_SIGHUP); } } elog(LOG, "pg_octopus monitor exiting"); proc_exit(0); }
static void kill_idle_main(Datum main_arg) { StringInfoData buf; /* Register functions for SIGTERM/SIGHUP management */ pqsignal(SIGHUP, kill_idle_sighup); pqsignal(SIGTERM, kill_idle_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to a database */ BackgroundWorkerInitializeConnection("postgres", NULL); /* Build query for process */ initStringInfo(&buf); kill_idle_build_query(&buf); while (!got_sigterm) { int rc, ret, i; /* Wait necessary amount of time */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, kill_max_idle_time * 1000L, PG_WAIT_EXTENSION); ResetLatch(&MyProc->procLatch); /* Emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* Process signals */ if (got_sighup) { int old_interval; /* Save old value of kill interval */ old_interval = kill_max_idle_time; /* Process config file */ ProcessConfigFile(PGC_SIGHUP); got_sighup = false; ereport(LOG, (errmsg("bgworker kill_idle signal: processed SIGHUP"))); /* Rebuild query if necessary */ if (old_interval != kill_max_idle_time) { resetStringInfo(&buf); initStringInfo(&buf); kill_idle_build_query(&buf); } } if (got_sigterm) { /* Simply exit */ ereport(LOG, (errmsg("bgworker kill_idle signal: processed SIGTERM"))); proc_exit(0); } /* Process idle connection kill */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, buf.data); /* Statement start time */ SetCurrentStatementStartTimestamp(); /* Execute query */ ret = SPI_execute(buf.data, false, 0); /* Some error handling */ if (ret != SPI_OK_SELECT) elog(FATAL, "Error when trying to kill idle connections"); /* Do some processing and log stuff disconnected */ for (i = 0; i < SPI_processed; i++) { int32 pidValue; bool isnull; char *datname = NULL; char *usename = NULL; char *client_addr = NULL; /* Fetch values */ pidValue = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull)); usename = DatumGetCString(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 3, &isnull)); datname = DatumGetCString(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 4, &isnull)); client_addr = DatumGetCString(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 5, &isnull)); /* Log what has been disconnected */ elog(LOG, "Disconnected idle connection: PID %d %s/%s/%s", pidValue, datname ? datname : "none", usename ? usename : "none", client_addr ? client_addr : "none"); } SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); } /* No problems, so clean exit */ proc_exit(0); }
static void worker_spi_main(Datum main_arg) { /* Register functions for SIGTERM/SIGHUP management */ pqsignal(SIGHUP, worker_spi_sighup); pqsignal(SIGTERM, worker_spi_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to our database */ BackgroundWorkerInitializeConnection("postgres", NULL); while (!got_sigterm) { int ret; int rc; StringInfoData buf; /* * Background workers mustn't call usleep() or any direct equivalent: * instead, they may wait on their process latch, which sleeps as * necessary, but is awakened if postmaster dies. That way the * background process goes away immediately in an emergency. */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 1000L); ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); initStringInfo(&buf); /* Build the query string */ appendStringInfo(&buf, "SELECT count(*) FROM pg_class;"); ret = SPI_execute(buf.data, true, 0); /* Some error messages in case of incorrect handling */ if (ret != SPI_OK_SELECT) elog(FATAL, "SPI_execute failed: error code %d", ret); if (SPI_processed > 0) { int32 count; bool isnull; count = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); elog(LOG, "Currently %d relations in database", count); } SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); } proc_exit(0); }
/* * worker logic */ void wed_worker_main(Datum main_arg) { StringInfoData buf; /* Establish signal handlers before unblocking signals. */ pqsignal(SIGHUP, wed_worker_sighup); pqsignal(SIGTERM, wed_worker_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to our database */ BackgroundWorkerInitializeConnection(wed_worker_db_name, NULL); elog(LOG, "%s initialized in: %s", MyBgworkerEntry->bgw_name, wed_worker_db_name); initStringInfo(&buf); appendStringInfo(&buf, "SELECT trcheck()"); /* * Main loop: do this until the SIGTERM handler tells us to terminate */ while (!got_sigterm) { int ret; int rc; /* * Background workers mustn't call usleep() or any direct equivalent: * instead, they may wait on their process latch, which sleeps as * necessary, but is awakened if postmaster dies. That way the * background process goes away immediately in an emergency. */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, wed_worker_naptime * 1000L); ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* * In case of a SIGHUP, just reload the configuration. */ if (got_sighup) { got_sighup = false; ProcessConfigFile(PGC_SIGHUP); } /* * Start a transaction on which we can run queries. Note that each * StartTransactionCommand() call should be preceded by a * SetCurrentStatementStartTimestamp() call, which sets both the time * for the statement we're about the run, and also the transaction * start time. Also, each other query sent to SPI should probably be * preceded by SetCurrentStatementStartTimestamp(), so that statement * start time is always up to date. * * The SPI_connect() call lets us run queries through the SPI manager, * and the PushActiveSnapshot() call creates an "active" snapshot * which is necessary for queries to have MVCC data to work on. * * The pgstat_report_activity() call makes our activity visible * through the pgstat views. */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, buf.data); /* We can now execute queries via SPI */ ret = SPI_execute(buf.data, false, 0); if (ret != SPI_OK_SELECT) elog(FATAL, "stored procedure trcheck() not found: error code %d", ret); elog(LOG, "%s : trcheck() done !", MyBgworkerEntry->bgw_name); /* * And finish our transaction. */ SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); } proc_exit(1); }
/* * Entry point for pg_keeper. */ void KeeperMain(Datum main_arg) { int ret; /* Sanity check */ checkParameter(); /* Initial setting */ KeeperMaster = keeper_node1_conninfo; KeeperStandby = keeper_node2_conninfo; /* Determine keeper mode of itself */ current_mode = RecoveryInProgress() ? KEEPER_STANDBY_MODE : KEEPER_MASTER_MODE; /* Establish signal handlers before unblocking signals */ pqsignal(SIGHUP, pg_keeper_sighup); pqsignal(SIGTERM, pg_keeper_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to our database */ BackgroundWorkerInitializeConnection("postgres", NULL); exec: if (current_mode == KEEPER_MASTER_MODE) { /* Routine for master_mode */ setupKeeperMaster(); ret = KeeperMainMaster(); } else if (current_mode == KEEPER_STANDBY_MODE) { /* Routine for standby_mode */ setupKeeperStandby(); ret = KeeperMainStandby(); /* * After promoting is sucessfully done, attempt to re-execute * main routine as master mode in order to avoid to restart * for invoking pg_keeper process again. */ if (ret) { /* Change mode to master mode */ current_mode = KEEPER_MASTER_MODE; /* Switch master and standby connection information */ swtichMasterAndStandby(); ereport(LOG, (errmsg("swtiched master and standby informations"), errdetail("\"%s\" is regarded as master server, \"%s\" is regarded as standby server", KeeperMaster, KeeperStandby))); goto exec; } } else ereport(ERROR, (errmsg("invalid keeper mode : \"%d\"", current_mode))); proc_exit(ret); }
static void powa_main(Datum main_arg) { char *q1 = "SELECT powa_take_snapshot()"; static char *q2 = "SET application_name = 'POWA collector'"; instr_time begin; instr_time end; long time_to_wait; die_on_too_small_frequency(); /* Set up signal handlers, then unblock signalsl */ pqsignal(SIGHUP, powa_sighup); pqsignal(SIGTERM, powa_sigterm); BackgroundWorkerUnblockSignals(); /* We only connect when powa_frequency >0. If not, powa has been deactivated */ if (powa_frequency < 0) { elog(LOG, "POWA is deactivated (powa.frequency = %i), exiting", powa_frequency); exit(1); } // We got here: it means powa_frequency > 0. Let's connect /* Connect to POWA database */ BackgroundWorkerInitializeConnection(powa_database, NULL); elog(LOG, "POWA connected to %s", powa_database); StartTransactionCommand(); SetCurrentStatementStartTimestamp(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); SPI_execute(q2, false, 0); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); /* let's store the current time. It will be used to calculate a quite stable interval between each measure */ while (!got_sigterm) { /* We can get here with a new value of powa_frequency because of a reload. Let's suicide to disconnect if this value is <0 */ if (powa_frequency < 0) { elog(LOG, "POWA exits to disconnect from the database now"); exit(1); } INSTR_TIME_SET_CURRENT(begin); ResetLatch(&MyProc->procLatch); StartTransactionCommand(); SetCurrentStatementStartTimestamp(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); SPI_execute(q1, false, 0); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); INSTR_TIME_SET_CURRENT(end); INSTR_TIME_SUBTRACT(end, begin); /* Wait powa.frequency, compensate for work time of last snapshot */ /* If we got off schedule (because of a compact or delete, just do another operation right now */ time_to_wait = powa_frequency - INSTR_TIME_GET_MILLISEC(end); if (time_to_wait > 0) { WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, time_to_wait); } } proc_exit(0); }
static void SaveBuffers(void) { int i; int num_buffers; int log_level = DEBUG3; SavedBuffer *saved_buffers; volatile BufferDesc *bufHdr; // XXX: Do we really need volatile here? FILE *file = NULL; int database_counter= 0; Oid prev_database = InvalidOid; Oid prev_filenode = InvalidOid; ForkNumber prev_forknum = InvalidForkNumber; BlockNumber prev_blocknum = InvalidBlockNumber; BlockNumber range_counter = 0; const char *savefile_path; /* * XXX: If the memory request fails, ask for a smaller memory chunk, and use * it to create chunks of save-files, and make the workers read those chunks. * * This is not a concern as of now, so deferred; there's at least one other * place that allocates (NBuffers * (much_bigger_struct)), so this seems to * be an acceptable practice. */ saved_buffers = (SavedBuffer *) palloc(sizeof(SavedBuffer) * NBuffers); /* Lock the buffer partitions for reading. */ for (i = 0; i < NUM_BUFFER_PARTITIONS; ++i) LWLockAcquire(FirstBufMappingLock + i, LW_SHARED); /* Scan and save a list of valid buffers. */ for (num_buffers = 0, i = 0, bufHdr = BufferDescriptors; i < NBuffers; ++i, ++bufHdr) { /* Lock each buffer header before inspecting. */ LockBufHdr(bufHdr); /* Skip invalid buffers */ if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_TAG_VALID)) { saved_buffers[num_buffers].database = bufHdr->tag.rnode.dbNode; saved_buffers[num_buffers].filenode = bufHdr->tag.rnode.relNode; saved_buffers[num_buffers].forknum = bufHdr->tag.forkNum; saved_buffers[num_buffers].blocknum = bufHdr->tag.blockNum; ++num_buffers; } UnlockBufHdr(bufHdr); } /* Unlock the buffer partitions in reverse order, to avoid a deadlock. */ for (i = NUM_BUFFER_PARTITIONS - 1; i >= 0; --i) LWLockRelease(FirstBufMappingLock + i); /* * Sort the list, so that we can optimize the storage of these buffers. * * The side-effect of this storage optimization is that when reading the * blocks back from relation forks, it leads to sequential reads, which * improve the restore speeds quite considerably as compared to random reads * from different blocks all over the data directory. */ pg_qsort(saved_buffers, num_buffers, sizeof(SavedBuffer), SavedBufferCmp); /* Connect to the database and start a transaction for database name lookups. */ BackgroundWorkerInitializeConnection(guc_default_database, NULL); SetCurrentStatementStartTimestamp(); StartTransactionCommand(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, "saving buffers"); for (i = 0; i < num_buffers; ++i) { int j; SavedBuffer *buf = &saved_buffers[i]; if (i == 0) { /* * Special case for global objects. The sort brings them to the * front of the list. */ /* Make sure the first buffer we save belongs to global object. */ Assert(buf->database == InvalidOid); /* * Database number (and save-file name) 1 is reserverd for storing * list of buffers of global objects. */ database_counter = 1; savefile_path = getSavefileName(database_counter); file = fileOpen(savefile_path, PG_BINARY_W); writeDBName("", file, savefile_path); prev_database = buf->database; } if (buf->database != prev_database) { char *dbname; /* * We are beginning to process a different database than the * previous one; close the save-file of previous database, and open * a new one. */ ++database_counter; dbname = get_database_name(buf->database); Assert(dbname != NULL); if (file != NULL) fileClose(file, savefile_path); savefile_path = getSavefileName(database_counter); file = fileOpen(savefile_path, PG_BINARY_W); writeDBName(dbname, file, savefile_path); pfree(dbname); /* Reset trackers appropriately */ prev_database = buf->database; prev_filenode = InvalidOid; prev_forknum = InvalidForkNumber; prev_blocknum = InvalidBlockNumber; range_counter = 0; } if (buf->filenode != prev_filenode) { /* We're beginning to process a new relation; emit a record for it. */ fileWrite("r", 1, file, savefile_path); fileWrite(&(buf->filenode), sizeof(Oid), file, savefile_path); /* Reset trackers appropriately */ prev_filenode = buf->filenode; prev_forknum = InvalidForkNumber; prev_blocknum = InvalidBlockNumber; range_counter = 0; } if (buf->forknum != prev_forknum) { /* * We're beginning to process a new fork of this relation; add a * record for it. */ fileWrite("f", 1, file, savefile_path); fileWrite(&(buf->forknum), sizeof(ForkNumber), file, savefile_path); /* Reset trackers appropriately */ prev_forknum = buf->forknum; prev_blocknum = InvalidBlockNumber; range_counter = 0; } ereport(log_level, (errmsg("writer: writing block db %d filenode %d forknum %d blocknum %d", database_counter, prev_filenode, prev_forknum, buf->blocknum))); fileWrite("b", 1, file, savefile_path); fileWrite(&(buf->blocknum), sizeof(BlockNumber), file, savefile_path); prev_blocknum = buf->blocknum; /* * If a continuous range of blocks follows this block, then emit one * entry for the range, instead of one for each block. */ range_counter = 0; for ( j = i+1; j < num_buffers; ++j) { SavedBuffer *tmp = &saved_buffers[j]; if (tmp->database == prev_database && tmp->filenode == prev_filenode && tmp->forknum == prev_forknum && tmp->blocknum == (prev_blocknum + range_counter + 1)) { ++range_counter; } } if (range_counter != 0) { ereport(log_level, (errmsg("writer: writing range db %d filenode %d forknum %d blocknum %d range %d", database_counter, prev_filenode, prev_forknum, prev_blocknum, range_counter))); fileWrite("N", 1, file, savefile_path); fileWrite(&range_counter, sizeof(range_counter), file, savefile_path); i += range_counter; } } ereport(LOG, (errmsg("Buffer Saver: saved metadata of %d blocks", num_buffers))); Assert(file != NULL); fileClose(file, savefile_path); pfree(saved_buffers); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); }
static void ReadBlocks(int filenum) { FILE *file; char record_type; char *dbname; Oid record_filenode; ForkNumber record_forknum; BlockNumber record_blocknum; BlockNumber record_range; int log_level = DEBUG3; Oid relOid = InvalidOid; Relation rel = NULL; bool skip_relation = false; bool skip_fork = false; bool skip_block = false; BlockNumber nblocks = 0; BlockNumber blocks_restored = 0; const char *filepath; /* * If this condition changes, then this code, and the code in the writer * will need to be changed; especially the format specifiers in log and * error messages. */ StaticAssertStmt(MaxBlockNumber == 0xFFFFFFFE, "Code may need review."); filepath = getSavefileName(filenum); file = fileOpen(filepath, PG_BINARY_R); dbname = readDBName(file, filepath); /* * When restoring global objects, the dbname is zero-length string, and non- * zero length otherwise. And filenum is never expected to be smaller than 1. */ Assert(filenum >= 1); Assert(filenum == 1 ? strlen(dbname) == 0 : strlen(dbname) > 0); /* To restore the global objects, use default database */ BackgroundWorkerInitializeConnection(filenum == 1 ? guc_default_database : dbname, NULL); SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, "restoring buffers"); /* * Note that in case of a read error, we will leak relcache entry that we may * currently have open. In case of EOF, we close the relation after the loop. */ while (fileRead(&record_type, 1, file, true, filepath)) { /* * If we want to process the signals, this seems to be the best place * to do it. Generally the backends refrain from processing config file * while in transaction, but that's more for the fear of allowing GUC * changes to affect expression evaluation, causing different results * for the same expression in a transaction. Since this worker is not * processing any queries, it is okay to process the config file here. * * Even though it's okay to process SIGHUP here, doing so doesn't add * any value. The only reason we might want to process config file here * would be to allow the user to interrupt the BlockReader's operation * by changing this extenstion's GUC parameter. But the user can do that * anyway, using SIGTERM or pg_terminate_backend(). */ /* Stop processing the save-file if the Postmaster wants us to die. */ if (got_sigterm) break; ereport(log_level, (errmsg("record type %x - %c", record_type, record_type))); switch (record_type) { case 'r': { /* Close the previous relation, if any. */ if (rel) { relation_close(rel, AccessShareLock); rel = NULL; } record_forknum = InvalidForkNumber; record_blocknum = InvalidBlockNumber; nblocks = 0; fileRead(&record_filenode, sizeof(Oid), file, false, filepath); relOid = GetRelOid(record_filenode); ereport(log_level, (errmsg("processing filenode %u, relation %u", record_filenode, relOid))); /* * If the relation has been rewritten/dropped since we saved it, * just skip it and process the next relation. */ if (relOid == InvalidOid) skip_relation = true; else { skip_relation = false; /* Open the relation */ rel = relation_open(relOid, AccessShareLock); RelationOpenSmgr(rel); } } break; case 'f': { record_blocknum = InvalidBlockNumber; nblocks = 0; fileRead(&record_forknum, sizeof(ForkNumber), file, false, filepath); if (skip_relation) continue; if (rel == NULL) ereport(ERROR, (errmsg("found a fork record without a preceeding relation record"))); ereport(log_level, (errmsg("processing fork %d", record_forknum))); if (!smgrexists(rel->rd_smgr, record_forknum)) skip_fork = true; else { skip_fork = false; nblocks = RelationGetNumberOfBlocksInFork(rel, record_forknum); } } break; case 'b': { if (record_forknum == InvalidForkNumber) ereport(ERROR, (errmsg("found a block record without a preceeding fork record"))); fileRead(&record_blocknum, sizeof(BlockNumber), file, false, filepath); if (skip_relation || skip_fork) continue; /* * Don't try to read past the file; the file may have been shrunk * by a vaccum/truncate operation. */ if (record_blocknum >= nblocks) { ereport(log_level, (errmsg("reader %d skipping block filenode %u forknum %d blocknum %u", filenum, record_filenode, record_forknum, record_blocknum))); skip_block = true; continue; } else { Buffer buf; skip_block = false; ereport(log_level, (errmsg("reader %d reading block filenode %u forknum %d blocknum %u", filenum, record_filenode, record_forknum, record_blocknum))); buf = ReadBufferExtended(rel, record_forknum, record_blocknum, RBM_NORMAL, NULL); ReleaseBuffer(buf); ++blocks_restored; } } break; case 'N': { BlockNumber block; Assert(record_blocknum != InvalidBlockNumber); if (record_blocknum == InvalidBlockNumber) ereport(ERROR, (errmsg("found a block range record without a preceeding block record"))); fileRead(&record_range, sizeof(int), file, false, filepath); if (skip_relation || skip_fork || skip_block) continue; ereport(log_level, (errmsg("reader %d reading range filenode %u forknum %d blocknum %u range %u", filenum, record_filenode, record_forknum, record_blocknum, record_range))); for (block = record_blocknum + 1; block <= (record_blocknum + record_range); ++block) { Buffer buf; /* * Don't try to read past the file; the file may have been * shrunk by a vaccum operation. */ if (block >= nblocks) { ereport(log_level, (errmsg("reader %d skipping block range filenode %u forknum %d start %u end %u", filenum, record_filenode, record_forknum, block, record_blocknum + record_range))); break; } buf = ReadBufferExtended(rel, record_forknum, block, RBM_NORMAL, NULL); ReleaseBuffer(buf); ++blocks_restored; } } break; default: { ereport(ERROR, (errmsg("found unexpected save-file marker %x - %c)", record_type, record_type))); Assert(false); } break; } } if (rel) relation_close(rel, AccessShareLock); ereport(LOG, (errmsg("Block Reader %d: restored %u blocks", filenum, blocks_restored))); SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); fileClose(file, filepath); /* Remove the save-file */ if (remove(filepath) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("error removing file \"%s\" : %m", filepath))); }