/** * @fn Datum repack_index_swap(PG_FUNCTION_ARGS) * @brief Swap out an original index on a table with the newly-created one. * * repack_index_swap(index) * * @param index Oid of the *original* index. * @retval void */ Datum repack_index_swap(PG_FUNCTION_ARGS) { Oid orig_idx_oid = PG_GETARG_OID(0); Oid repacked_idx_oid; StringInfoData str; SPITupleTable *tuptable; TupleDesc desc; HeapTuple tuple; /* authority check */ must_be_superuser("repack_index_swap"); /* connect to SPI manager */ repack_init(); initStringInfo(&str); /* Find the OID of our new index. */ appendStringInfo(&str, "SELECT oid FROM pg_class " "WHERE relname = 'index_%u' AND relkind = 'i'", orig_idx_oid); execute(SPI_OK_SELECT, str.data); if (SPI_processed != 1) elog(ERROR, "Could not find index 'index_%u', found " UINT64_FORMAT " matches", orig_idx_oid, (uint64) SPI_processed); tuptable = SPI_tuptable; desc = tuptable->tupdesc; tuple = tuptable->vals[0]; repacked_idx_oid = getoid(tuple, desc, 1); swap_heap_or_index_files(orig_idx_oid, repacked_idx_oid); SPI_finish(); PG_RETURN_VOID(); }
void remove_user(object user) { string datagram; int userid; userid = getoid(user); /* get number following # in file_name(user) */ datagram = header("Z") + TAB + mudname + TAB + userid; send_data(datagram); }
void add_user(object user, int which) { string name, datagram; int userid, login_time; if (!user) { return; } userid = getoid(user); /* refresh approx. 1/PARTITIONS of list each time */ if ((userid % PARTITIONS) != which) { return; } login_time = (int)user->QUERY_LOGIN_TIME; name = (string)user->QUERY_NAME; datagram = header("A")+TAB+mudname+TAB+userid+TAB+login_time+TAB+GENERATION +TAB+name; send_data(datagram); }
/** * @fn Datum reorg_swap(PG_FUNCTION_ARGS) * @brief Swapping relfilenode of tables and relation ids of toast tables * and toast indexes. * * reorg_swap(oid, relname) * * TODO: remove useless CommandCounterIncrement(). * * @param oid Oid of table of target. * @retval None. */ Datum reorg_swap(PG_FUNCTION_ARGS) { Oid oid = PG_GETARG_OID(0); const char *relname = get_quoted_relname(oid); const char *nspname = get_quoted_nspname(oid); Oid argtypes[1] = { OIDOID }; bool nulls[1] = { 0 }; Datum values[1]; SPITupleTable *tuptable; TupleDesc desc; HeapTuple tuple; uint32 records; uint32 i; Oid reltoastrelid1; Oid reltoastidxid1; Oid oid2; Oid reltoastrelid2; Oid reltoastidxid2; Oid owner1; Oid owner2; /* authority check */ must_be_superuser("reorg_swap"); /* connect to SPI manager */ reorg_init(); /* swap relfilenode and dependencies for tables. */ values[0] = ObjectIdGetDatum(oid); execute_with_args(SPI_OK_SELECT, "SELECT X.reltoastrelid, TX.reltoastidxid, X.relowner," " Y.oid, Y.reltoastrelid, TY.reltoastidxid, Y.relowner" " FROM pg_catalog.pg_class X LEFT JOIN pg_catalog.pg_class TX" " ON X.reltoastrelid = TX.oid," " pg_catalog.pg_class Y LEFT JOIN pg_catalog.pg_class TY" " ON Y.reltoastrelid = TY.oid" " WHERE X.oid = $1" " AND Y.oid = ('reorg.table_' || X.oid)::regclass", 1, argtypes, values, nulls); tuptable = SPI_tuptable; desc = tuptable->tupdesc; records = SPI_processed; if (records == 0) elog(ERROR, "reorg_swap : no swap target"); tuple = tuptable->vals[0]; reltoastrelid1 = getoid(tuple, desc, 1); reltoastidxid1 = getoid(tuple, desc, 2); owner1 = getoid(tuple, desc, 3); oid2 = getoid(tuple, desc, 4); reltoastrelid2 = getoid(tuple, desc, 5); reltoastidxid2 = getoid(tuple, desc, 6); owner2 = getoid(tuple, desc, 7); /* change owner of new relation to original owner */ if (owner1 != owner2) { ATExecChangeOwner(oid2, owner1, true, AccessExclusiveLock); CommandCounterIncrement(); } /* swap tables. */ swap_heap_or_index_files(oid, oid2); CommandCounterIncrement(); /* swap indexes. */ values[0] = ObjectIdGetDatum(oid); execute_with_args(SPI_OK_SELECT, "SELECT X.oid, Y.oid" " FROM pg_catalog.pg_index I," " pg_catalog.pg_class X," " pg_catalog.pg_class Y" " WHERE I.indrelid = $1" " AND I.indexrelid = X.oid" " AND I.indisvalid" " AND Y.oid = ('reorg.index_' || X.oid)::regclass", 1, argtypes, values, nulls); tuptable = SPI_tuptable; desc = tuptable->tupdesc; records = SPI_processed; for (i = 0; i < records; i++) { Oid idx1, idx2; tuple = tuptable->vals[i]; idx1 = getoid(tuple, desc, 1); idx2 = getoid(tuple, desc, 2); swap_heap_or_index_files(idx1, idx2); CommandCounterIncrement(); } /* swap names for toast tables and toast indexes */ if (reltoastrelid1 == InvalidOid) { if (reltoastidxid1 != InvalidOid || reltoastrelid2 != InvalidOid || reltoastidxid2 != InvalidOid) elog(ERROR, "reorg_swap : unexpected toast relations (T1=%u, I1=%u, T2=%u, I2=%u", reltoastrelid1, reltoastidxid1, reltoastrelid2, reltoastidxid2); /* do nothing */ } else if (reltoastrelid2 == InvalidOid) { char name[NAMEDATALEN]; if (reltoastidxid1 == InvalidOid || reltoastidxid2 != InvalidOid) elog(ERROR, "reorg_swap : unexpected toast relations (T1=%u, I1=%u, T2=%u, I2=%u", reltoastrelid1, reltoastidxid1, reltoastrelid2, reltoastidxid2); /* rename X to Y */ snprintf(name, NAMEDATALEN, "pg_toast_%u", oid2); RENAME_REL(reltoastrelid1, name); snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid2); RENAME_REL(reltoastidxid1, name); CommandCounterIncrement(); } else if (reltoastrelid1 != InvalidOid) { char name[NAMEDATALEN]; int pid = getpid(); /* rename X to TEMP */ snprintf(name, NAMEDATALEN, "pg_toast_pid%d", pid); RENAME_REL(reltoastrelid1, name); snprintf(name, NAMEDATALEN, "pg_toast_pid%d_index", pid); RENAME_REL(reltoastidxid1, name); CommandCounterIncrement(); /* rename Y to X */ snprintf(name, NAMEDATALEN, "pg_toast_%u", oid); RENAME_REL(reltoastrelid2, name); snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid); RENAME_REL(reltoastidxid2, name); CommandCounterIncrement(); /* rename TEMP to Y */ snprintf(name, NAMEDATALEN, "pg_toast_%u", oid2); RENAME_REL(reltoastrelid1, name); snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid2); RENAME_REL(reltoastidxid1, name); CommandCounterIncrement(); } /* drop reorg trigger */ execute_with_format( SPI_OK_UTILITY, "DROP TRIGGER IF EXISTS z_reorg_trigger ON %s.%s CASCADE", nspname, relname); SPI_finish(); PG_RETURN_VOID(); }
/* * Re-organize one table. */ static void reorg_one_table(const reorg_table *table, const char *orderby) { PGresult *res; const char *params[1]; int num; int i; int num_waiting = 0; char *vxid; char buffer[12]; StringInfoData sql; initStringInfo(&sql); elog(DEBUG2, "---- reorg_one_table ----"); elog(DEBUG2, "target_name : %s", table->target_name); elog(DEBUG2, "target_oid : %u", table->target_oid); elog(DEBUG2, "target_toast : %u", table->target_toast); elog(DEBUG2, "target_tidx : %u", table->target_tidx); elog(DEBUG2, "pkid : %u", table->pkid); elog(DEBUG2, "ckid : %u", table->ckid); elog(DEBUG2, "create_pktype : %s", table->create_pktype); elog(DEBUG2, "create_log : %s", table->create_log); elog(DEBUG2, "create_trigger : %s", table->create_trigger); elog(DEBUG2, "alter_table : %s", table->alter_table); elog(DEBUG2, "create_table : %s", table->create_table); elog(DEBUG2, "drop_columns : %s", table->drop_columns ? table->drop_columns : "(skipped)"); elog(DEBUG2, "delete_log : %s", table->delete_log); elog(DEBUG2, "lock_table : %s", table->lock_table); elog(DEBUG2, "sql_peek : %s", table->sql_peek); elog(DEBUG2, "sql_insert : %s", table->sql_insert); elog(DEBUG2, "sql_delete : %s", table->sql_delete); elog(DEBUG2, "sql_update : %s", table->sql_update); elog(DEBUG2, "sql_pop : %s", table->sql_pop); /* * 1. Setup workspaces and a trigger. */ elog(DEBUG2, "---- setup ----"); lock_exclusive(utoa(table->target_oid, buffer), table->lock_table); /* * Check z_reorg_trigger is the trigger executed at last so that * other before triggers cannot modify triggered tuples. */ params[0] = utoa(table->target_oid, buffer); res = execute("SELECT reorg.conflicted_triggers($1)", 1, params); if (PQntuples(res) > 0) ereport(ERROR, (errcode(E_PG_COMMAND), errmsg("trigger %s conflicted for %s", PQgetvalue(res, 0, 0), table->target_name))); PQclear(res); command(table->create_pktype, 0, NULL); command(table->create_log, 0, NULL); command(table->create_trigger, 0, NULL); command(table->alter_table, 0, NULL); printfStringInfo(&sql, "SELECT reorg.disable_autovacuum('reorg.log_%u')", table->target_oid); command(sql.data, 0, NULL); command("COMMIT", 0, NULL); /* * Register the table to be dropped on error. We use pktype as * an advisory lock. The registration should be done after * the first command succeeds. */ pgut_atexit_push(&reorg_cleanup, (void *) table); /* * 2. Copy tuples into temp table. */ elog(DEBUG2, "---- copy tuples ----"); command("BEGIN ISOLATION LEVEL SERIALIZABLE", 0, NULL); /* SET work_mem = maintenance_work_mem */ command("SELECT set_config('work_mem', current_setting('maintenance_work_mem'), true)", 0, NULL); if (orderby && !orderby[0]) command("SET LOCAL synchronize_seqscans = off", 0, NULL); res = execute(SQL_XID_SNAPSHOT, 0, NULL); vxid = strdup(PQgetvalue(res, 0, 0)); PQclear(res); command(table->delete_log, 0, NULL); command(table->create_table, 0, NULL); printfStringInfo(&sql, "SELECT reorg.disable_autovacuum('reorg.table_%u')", table->target_oid); if (table->drop_columns) command(table->drop_columns, 0, NULL); command(sql.data, 0, NULL); command("COMMIT", 0, NULL); /* * 3. Create indexes on temp table. */ elog(DEBUG2, "---- create indexes ----"); params[0] = utoa(table->target_oid, buffer); res = execute("SELECT indexrelid," " reorg.reorg_indexdef(indexrelid, indrelid)," " indisvalid," " pg_get_indexdef(indexrelid)" " FROM pg_index WHERE indrelid = $1", 1, params); num = PQntuples(res); for (i = 0; i < num; i++) { reorg_index index; int c = 0; const char *isvalid; const char *indexdef; index.target_oid = getoid(res, i, c++); index.create_index = getstr(res, i, c++); isvalid = getstr(res, i, c++); indexdef = getstr(res, i, c++); if (isvalid && isvalid[0] == 'f') { elog(WARNING, "skipping invalid index: %s", indexdef); continue; } elog(DEBUG2, "[%d]", i); elog(DEBUG2, "target_oid : %u", index.target_oid); elog(DEBUG2, "create_index : %s", index.create_index); /* * NOTE: If we want to create multiple indexes in parallel, * we need to call create_index in multiple connections. */ command(index.create_index, 0, NULL); } PQclear(res); /* * 4. Apply log to temp table until no tuples are left in the log * and all of the old transactions are finished. */ for (;;) { num = apply_log(table, APPLY_COUNT); if (num > 0) continue; /* there might be still some tuples, repeat. */ /* old transactions still alive ? */ params[0] = vxid; res = execute(SQL_XID_ALIVE, 1, params); num = PQntuples(res); if (num > 0) { /* Wait for old transactions. * Only display the message below when the number of * transactions we are waiting on changes (presumably, * num_waiting should only go down), so as not to * be too noisy. */ if (num != num_waiting) { elog(NOTICE, "Waiting for %d transactions to finish. First PID: %s", num, PQgetvalue(res, 0, 0)); num_waiting = num; } PQclear(res); sleep(1); continue; } else { /* All old transactions are finished; * go to next step. */ PQclear(res); break; } } /* * 5. Swap. */ elog(DEBUG2, "---- swap ----"); lock_exclusive(utoa(table->target_oid, buffer), table->lock_table); apply_log(table, 0); params[0] = utoa(table->target_oid, buffer); command("SELECT reorg.reorg_swap($1)", 1, params); command("COMMIT", 0, NULL); /* * 6. Drop. */ elog(DEBUG2, "---- drop ----"); command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL); params[0] = utoa(table->target_oid, buffer); command("SELECT reorg.reorg_drop($1)", 1, params); command("COMMIT", 0, NULL); pgut_atexit_pop(&reorg_cleanup, (void *) table); free(vxid); /* * 7. Analyze. * Note that cleanup hook has been already uninstalled here because analyze * is not an important operation; No clean up even if failed. */ if (analyze) { elog(DEBUG2, "---- analyze ----"); command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL); printfStringInfo(&sql, "ANALYZE %s", table->target_name); command(sql.data, 0, NULL); command("COMMIT", 0, NULL); } termStringInfo(&sql); }
/* * Call reorg_one_table for the target table or each table in a database. */ static bool reorg_one_database(const char *orderby, const char *table) { bool ret = true; PGresult *res; int i; int num; StringInfoData sql; initStringInfo(&sql); reconnect(ERROR); /* Disable statement timeout. */ command("SET statement_timeout = 0", 0, NULL); /* Restrict search_path to system catalog. */ command("SET search_path = pg_catalog, pg_temp, public", 0, NULL); /* To avoid annoying "create implicit ..." messages. */ command("SET client_min_messages = warning", 0, NULL); /* acquire target tables */ appendStringInfoString(&sql, "SELECT * FROM reorg.tables WHERE "); if (table) { appendStringInfoString(&sql, "relid = $1::regclass"); res = execute_elevel(sql.data, 1, &table, DEBUG2); } else { appendStringInfoString(&sql, "pkid IS NOT NULL"); if (!orderby) appendStringInfoString(&sql, " AND ckid IS NOT NULL"); res = execute_elevel(sql.data, 0, NULL, DEBUG2); } if (PQresultStatus(res) != PGRES_TUPLES_OK) { if (sqlstate_equals(res, SQLSTATE_INVALID_SCHEMA_NAME)) { /* Schema reorg does not exist. Skip the database. */ ret = false; goto cleanup; } else { /* exit otherwise */ printf("%s", PQerrorMessage(connection)); PQclear(res); exit(1); } } num = PQntuples(res); for (i = 0; i < num; i++) { reorg_table table; const char *create_table; const char *ckey; int c = 0; table.target_name = getstr(res, i, c++); table.target_oid = getoid(res, i, c++); table.target_toast = getoid(res, i, c++); table.target_tidx = getoid(res, i, c++); table.pkid = getoid(res, i, c++); table.ckid = getoid(res, i, c++); if (table.pkid == 0) ereport(ERROR, (errcode(E_PG_COMMAND), errmsg("relation \"%s\" must have a primary key or not-null unique keys", table.target_name))); table.create_pktype = getstr(res, i, c++); table.create_log = getstr(res, i, c++); table.create_trigger = getstr(res, i, c++); table.alter_table = getstr(res, i, c++); create_table = getstr(res, i, c++); table.drop_columns = getstr(res, i, c++); table.delete_log = getstr(res, i, c++); table.lock_table = getstr(res, i, c++); ckey = getstr(res, i, c++); resetStringInfo(&sql); if (!orderby) { /* CLUSTER mode */ if (ckey == NULL) ereport(ERROR, (errcode(E_PG_COMMAND), errmsg("relation \"%s\" has no cluster key", table.target_name))); appendStringInfo(&sql, "%s ORDER BY %s", create_table, ckey); table.create_table = sql.data; } else if (!orderby[0]) { /* VACUUM FULL mode */ table.create_table = create_table; } else { /* User specified ORDER BY */ appendStringInfo(&sql, "%s ORDER BY %s", create_table, orderby); table.create_table = sql.data; } table.sql_peek = getstr(res, i, c++); table.sql_insert = getstr(res, i, c++); table.sql_delete = getstr(res, i, c++); table.sql_update = getstr(res, i, c++); table.sql_pop = getstr(res, i, c++); reorg_one_table(&table, orderby); } cleanup: PQclear(res); disconnect(); termStringInfo(&sql); return ret; }