Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * autovacuum.c
4 : *
5 : * PostgreSQL Integrated Autovacuum Daemon
6 : *
7 : * The autovacuum system is structured in two different kinds of processes: the
8 : * autovacuum launcher and the autovacuum worker. The launcher is an
9 : * always-running process, started by the postmaster when the autovacuum GUC
10 : * parameter is set. The launcher schedules autovacuum workers to be started
11 : * when appropriate. The workers are the processes which execute the actual
12 : * vacuuming; they connect to a database as determined in the launcher, and
13 : * once connected they examine the catalogs to select the tables to vacuum.
14 : *
15 : * The autovacuum launcher cannot start the worker processes by itself,
16 : * because doing so would cause robustness issues (namely, failure to shut
17 : * them down on exceptional conditions, and also, since the launcher is
18 : * connected to shared memory and is thus subject to corruption there, it is
19 : * not as robust as the postmaster). So it leaves that task to the postmaster.
20 : *
21 : * There is an autovacuum shared memory area, where the launcher stores
22 : * information about the database it wants vacuumed. When it wants a new
23 : * worker to start, it sets a flag in shared memory and sends a signal to the
24 : * postmaster. Then postmaster knows nothing more than it must start a worker;
25 : * so it forks a new child, which turns into a worker. This new process
26 : * connects to shared memory, and there it can inspect the information that the
27 : * launcher has set up.
28 : *
29 : * If the fork() call fails in the postmaster, it sets a flag in the shared
30 : * memory area, and sends a signal to the launcher. The launcher, upon
31 : * noticing the flag, can try starting the worker again by resending the
32 : * signal. Note that the failure can only be transient (fork failure due to
33 : * high load, memory pressure, too many processes, etc); more permanent
34 : * problems, like failure to connect to a database, are detected later in the
35 : * worker and dealt with just by having the worker exit normally. The launcher
36 : * will launch a new worker again later, per schedule.
37 : *
38 : * When the worker is done vacuuming it sends SIGUSR2 to the launcher. The
39 : * launcher then wakes up and is able to launch another worker, if the schedule
40 : * is so tight that a new worker is needed immediately. At this time the
41 : * launcher can also balance the settings for the various remaining workers'
42 : * cost-based vacuum delay feature.
43 : *
44 : * Note that there can be more than one worker in a database concurrently.
45 : * They will store the table they are currently vacuuming in shared memory, so
46 : * that other workers avoid being blocked waiting for the vacuum lock for that
47 : * table. They will also reload the pgstats data just before vacuuming each
48 : * table, to avoid vacuuming a table that was just finished being vacuumed by
49 : * another worker and thus is no longer noted in shared memory. However,
50 : * there is a window (caused by pgstat delay) on which a worker may choose a
51 : * table that was already vacuumed; this is a bug in the current design.
52 : *
53 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
54 : * Portions Copyright (c) 1994, Regents of the University of California
55 : *
56 : *
57 : * IDENTIFICATION
58 : * src/backend/postmaster/autovacuum.c
59 : *
60 : *-------------------------------------------------------------------------
61 : */
62 : #include "postgres.h"
63 :
64 : #include <signal.h>
65 : #include <sys/time.h>
66 : #include <unistd.h>
67 :
68 : #include "access/heapam.h"
69 : #include "access/htup_details.h"
70 : #include "access/multixact.h"
71 : #include "access/reloptions.h"
72 : #include "access/transam.h"
73 : #include "access/xact.h"
74 : #include "catalog/dependency.h"
75 : #include "catalog/namespace.h"
76 : #include "catalog/pg_database.h"
77 : #include "commands/dbcommands.h"
78 : #include "commands/vacuum.h"
79 : #include "lib/ilist.h"
80 : #include "libpq/pqsignal.h"
81 : #include "miscadmin.h"
82 : #include "pgstat.h"
83 : #include "postmaster/autovacuum.h"
84 : #include "postmaster/fork_process.h"
85 : #include "postmaster/postmaster.h"
86 : #include "storage/bufmgr.h"
87 : #include "storage/ipc.h"
88 : #include "storage/latch.h"
89 : #include "storage/lmgr.h"
90 : #include "storage/pmsignal.h"
91 : #include "storage/proc.h"
92 : #include "storage/procsignal.h"
93 : #include "storage/sinvaladt.h"
94 : #include "storage/smgr.h"
95 : #include "tcop/tcopprot.h"
96 : #include "utils/dsa.h"
97 : #include "utils/fmgroids.h"
98 : #include "utils/fmgrprotos.h"
99 : #include "utils/lsyscache.h"
100 : #include "utils/memutils.h"
101 : #include "utils/ps_status.h"
102 : #include "utils/rel.h"
103 : #include "utils/snapmgr.h"
104 : #include "utils/syscache.h"
105 : #include "utils/timeout.h"
106 : #include "utils/timestamp.h"
107 : #include "utils/tqual.h"
108 :
109 :
110 : /*
111 : * GUC parameters
112 : */
113 : bool autovacuum_start_daemon = false;
114 : int autovacuum_max_workers;
115 : int autovacuum_work_mem = -1;
116 : int autovacuum_naptime;
117 : int autovacuum_vac_thresh;
118 : double autovacuum_vac_scale;
119 : int autovacuum_anl_thresh;
120 : double autovacuum_anl_scale;
121 : int autovacuum_freeze_max_age;
122 : int autovacuum_multixact_freeze_max_age;
123 :
124 : int autovacuum_vac_cost_delay;
125 : int autovacuum_vac_cost_limit;
126 :
127 : int Log_autovacuum_min_duration = -1;
128 :
129 : /* how long to keep pgstat data in the launcher, in milliseconds */
130 : #define STATS_READ_DELAY 1000
131 :
132 : /* the minimum allowed time between two awakenings of the launcher */
133 : #define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */
134 : #define MAX_AUTOVAC_SLEEPTIME 300 /* seconds */
135 :
136 : /* Flags to tell if we are in an autovacuum process */
137 : static bool am_autovacuum_launcher = false;
138 : static bool am_autovacuum_worker = false;
139 :
140 : /* Flags set by signal handlers */
141 : static volatile sig_atomic_t got_SIGHUP = false;
142 : static volatile sig_atomic_t got_SIGUSR2 = false;
143 : static volatile sig_atomic_t got_SIGTERM = false;
144 :
145 : /* Comparison points for determining whether freeze_max_age is exceeded */
146 : static TransactionId recentXid;
147 : static MultiXactId recentMulti;
148 :
149 : /* Default freeze ages to use for autovacuum (varies by database) */
150 : static int default_freeze_min_age;
151 : static int default_freeze_table_age;
152 : static int default_multixact_freeze_min_age;
153 : static int default_multixact_freeze_table_age;
154 :
155 : /* Memory context for long-lived data */
156 : static MemoryContext AutovacMemCxt;
157 :
158 : /* struct to keep track of databases in launcher */
159 : typedef struct avl_dbase
160 : {
161 : Oid adl_datid; /* hash key -- must be first */
162 : TimestampTz adl_next_worker;
163 : int adl_score;
164 : dlist_node adl_node;
165 : } avl_dbase;
166 :
167 : /* struct to keep track of databases in worker */
168 : typedef struct avw_dbase
169 : {
170 : Oid adw_datid;
171 : char *adw_name;
172 : TransactionId adw_frozenxid;
173 : MultiXactId adw_minmulti;
174 : PgStat_StatDBEntry *adw_entry;
175 : } avw_dbase;
176 :
177 : /* struct to keep track of tables to vacuum and/or analyze, in 1st pass */
178 : typedef struct av_relation
179 : {
180 : Oid ar_toastrelid; /* hash key - must be first */
181 : Oid ar_relid;
182 : bool ar_hasrelopts;
183 : AutoVacOpts ar_reloptions; /* copy of AutoVacOpts from the main table's
184 : * reloptions, or NULL if none */
185 : } av_relation;
186 :
187 : /* struct to keep track of tables to vacuum and/or analyze, after rechecking */
188 : typedef struct autovac_table
189 : {
190 : Oid at_relid;
191 : int at_vacoptions; /* bitmask of VacuumOption */
192 : VacuumParams at_params;
193 : int at_vacuum_cost_delay;
194 : int at_vacuum_cost_limit;
195 : bool at_dobalance;
196 : bool at_sharedrel;
197 : char *at_relname;
198 : char *at_nspname;
199 : char *at_datname;
200 : } autovac_table;
201 :
202 : /*-------------
203 : * This struct holds information about a single worker's whereabouts. We keep
204 : * an array of these in shared memory, sized according to
205 : * autovacuum_max_workers.
206 : *
207 : * wi_links entry into free list or running list
208 : * wi_dboid OID of the database this worker is supposed to work on
209 : * wi_tableoid OID of the table currently being vacuumed, if any
210 : * wi_sharedrel flag indicating whether table is marked relisshared
211 : * wi_proc pointer to PGPROC of the running worker, NULL if not started
212 : * wi_launchtime Time at which this worker was launched
213 : * wi_cost_* Vacuum cost-based delay parameters current in this worker
214 : *
215 : * All fields are protected by AutovacuumLock, except for wi_tableoid which is
216 : * protected by AutovacuumScheduleLock (which is read-only for everyone except
217 : * that worker itself).
218 : *-------------
219 : */
220 : typedef struct WorkerInfoData
221 : {
222 : dlist_node wi_links;
223 : Oid wi_dboid;
224 : Oid wi_tableoid;
225 : PGPROC *wi_proc;
226 : TimestampTz wi_launchtime;
227 : bool wi_dobalance;
228 : bool wi_sharedrel;
229 : int wi_cost_delay;
230 : int wi_cost_limit;
231 : int wi_cost_limit_base;
232 : } WorkerInfoData;
233 :
234 : typedef struct WorkerInfoData *WorkerInfo;
235 :
236 : /*
237 : * Possible signals received by the launcher from remote processes. These are
238 : * stored atomically in shared memory so that other processes can set them
239 : * without locking.
240 : */
241 : typedef enum
242 : {
243 : AutoVacForkFailed, /* failed trying to start a worker */
244 : AutoVacRebalance, /* rebalance the cost limits */
245 : AutoVacNumSignals /* must be last */
246 : } AutoVacuumSignal;
247 :
248 : /*
249 : * Autovacuum workitem array, stored in AutoVacuumShmem->av_workItems. This
250 : * list is mostly protected by AutovacuumLock, except that if an item is
251 : * marked 'active' other processes must not modify the work-identifying
252 : * members.
253 : */
254 : typedef struct AutoVacuumWorkItem
255 : {
256 : AutoVacuumWorkItemType avw_type;
257 : bool avw_used; /* below data is valid */
258 : bool avw_active; /* being processed */
259 : Oid avw_database;
260 : Oid avw_relation;
261 : BlockNumber avw_blockNumber;
262 : } AutoVacuumWorkItem;
263 :
264 : #define NUM_WORKITEMS 256
265 :
266 : /*-------------
267 : * The main autovacuum shmem struct. On shared memory we store this main
268 : * struct and the array of WorkerInfo structs. This struct keeps:
269 : *
270 : * av_signal set by other processes to indicate various conditions
271 : * av_launcherpid the PID of the autovacuum launcher
272 : * av_freeWorkers the WorkerInfo freelist
273 : * av_runningWorkers the WorkerInfo non-free queue
274 : * av_startingWorker pointer to WorkerInfo currently being started (cleared by
275 : * the worker itself as soon as it's up and running)
276 : * av_workItems work item array
277 : *
278 : * This struct is protected by AutovacuumLock, except for av_signal and parts
279 : * of the worker list (see above).
280 : *-------------
281 : */
282 : typedef struct
283 : {
284 : sig_atomic_t av_signal[AutoVacNumSignals];
285 : pid_t av_launcherpid;
286 : dlist_head av_freeWorkers;
287 : dlist_head av_runningWorkers;
288 : WorkerInfo av_startingWorker;
289 : AutoVacuumWorkItem av_workItems[NUM_WORKITEMS];
290 : } AutoVacuumShmemStruct;
291 :
292 : static AutoVacuumShmemStruct *AutoVacuumShmem;
293 :
294 : /*
295 : * the database list (of avl_dbase elements) in the launcher, and the context
296 : * that contains it
297 : */
298 : static dlist_head DatabaseList = DLIST_STATIC_INIT(DatabaseList);
299 : static MemoryContext DatabaseListCxt = NULL;
300 :
301 : /* Pointer to my own WorkerInfo, valid on each worker */
302 : static WorkerInfo MyWorkerInfo = NULL;
303 :
304 : /* PID of launcher, valid only in worker while shutting down */
305 : int AutovacuumLauncherPid = 0;
306 :
307 : #ifdef EXEC_BACKEND
308 : static pid_t avlauncher_forkexec(void);
309 : static pid_t avworker_forkexec(void);
310 : #endif
311 : NON_EXEC_STATIC void AutoVacWorkerMain(int argc, char *argv[]) pg_attribute_noreturn();
312 : NON_EXEC_STATIC void AutoVacLauncherMain(int argc, char *argv[]) pg_attribute_noreturn();
313 :
314 : static Oid do_start_worker(void);
315 : static void launcher_determine_sleep(bool canlaunch, bool recursing,
316 : struct timeval *nap);
317 : static void launch_worker(TimestampTz now);
318 : static List *get_database_list(void);
319 : static void rebuild_database_list(Oid newdb);
320 : static int db_comparator(const void *a, const void *b);
321 : static void autovac_balance_cost(void);
322 :
323 : static void do_autovacuum(void);
324 : static void FreeWorkerInfo(int code, Datum arg);
325 :
326 : static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map,
327 : TupleDesc pg_class_desc,
328 : int effective_multixact_freeze_max_age);
329 : static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
330 : Form_pg_class classForm,
331 : PgStat_StatTabEntry *tabentry,
332 : int effective_multixact_freeze_max_age,
333 : bool *dovacuum, bool *doanalyze, bool *wraparound);
334 :
335 : static void autovacuum_do_vac_analyze(autovac_table *tab,
336 : BufferAccessStrategy bstrategy);
337 : static AutoVacOpts *extract_autovac_opts(HeapTuple tup,
338 : TupleDesc pg_class_desc);
339 : static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared,
340 : PgStat_StatDBEntry *shared,
341 : PgStat_StatDBEntry *dbentry);
342 : static void perform_work_item(AutoVacuumWorkItem *workitem);
343 : static void autovac_report_activity(autovac_table *tab);
344 : static void autovac_report_workitem(AutoVacuumWorkItem *workitem,
345 : const char *nspname, const char *relname);
346 : static void av_sighup_handler(SIGNAL_ARGS);
347 : static void avl_sigusr2_handler(SIGNAL_ARGS);
348 : static void avl_sigterm_handler(SIGNAL_ARGS);
349 : static void autovac_refresh_stats(void);
350 :
351 :
352 :
353 : /********************************************************************
354 : * AUTOVACUUM LAUNCHER CODE
355 : ********************************************************************/
356 :
357 : #ifdef EXEC_BACKEND
358 : /*
359 : * forkexec routine for the autovacuum launcher process.
360 : *
361 : * Format up the arglist, then fork and exec.
362 : */
363 : static pid_t
364 : avlauncher_forkexec(void)
365 : {
366 : char *av[10];
367 : int ac = 0;
368 :
369 : av[ac++] = "postgres";
370 : av[ac++] = "--forkavlauncher";
371 : av[ac++] = NULL; /* filled in by postmaster_forkexec */
372 : av[ac] = NULL;
373 :
374 : Assert(ac < lengthof(av));
375 :
376 : return postmaster_forkexec(ac, av);
377 : }
378 :
379 : /*
380 : * We need this set from the outside, before InitProcess is called
381 : */
382 : void
383 : AutovacuumLauncherIAm(void)
384 : {
385 : am_autovacuum_launcher = true;
386 : }
387 : #endif
388 :
389 : /*
390 : * Main entry point for autovacuum launcher process, to be called from the
391 : * postmaster.
392 : */
393 : int
394 1 : StartAutoVacLauncher(void)
395 : {
396 : pid_t AutoVacPID;
397 :
398 : #ifdef EXEC_BACKEND
399 : switch ((AutoVacPID = avlauncher_forkexec()))
400 : #else
401 1 : switch ((AutoVacPID = fork_process()))
402 : #endif
403 : {
404 : case -1:
405 0 : ereport(LOG,
406 : (errmsg("could not fork autovacuum launcher process: %m")));
407 0 : return 0;
408 :
409 : #ifndef EXEC_BACKEND
410 : case 0:
411 : /* in postmaster child ... */
412 1 : InitPostmasterChild();
413 :
414 : /* Close the postmaster's sockets */
415 1 : ClosePostmasterPorts(false);
416 :
417 1 : AutoVacLauncherMain(0, NULL);
418 : break;
419 : #endif
420 : default:
421 1 : return (int) AutoVacPID;
422 : }
423 :
424 : /* shouldn't get here */
425 : return 0;
426 : }
427 :
428 : /*
429 : * Main loop for the autovacuum launcher process.
430 : */
431 : NON_EXEC_STATIC void
432 1 : AutoVacLauncherMain(int argc, char *argv[])
433 : {
434 : sigjmp_buf local_sigjmp_buf;
435 :
436 1 : am_autovacuum_launcher = true;
437 :
438 : /* Identify myself via ps */
439 1 : init_ps_display("autovacuum launcher process", "", "", "");
440 :
441 1 : ereport(DEBUG1,
442 : (errmsg("autovacuum launcher started")));
443 :
444 1 : if (PostAuthDelay)
445 0 : pg_usleep(PostAuthDelay * 1000000L);
446 :
447 1 : SetProcessingMode(InitProcessing);
448 :
449 : /*
450 : * Set up signal handlers. We operate on databases much like a regular
451 : * backend, so we use the same signal handling. See equivalent code in
452 : * tcop/postgres.c.
453 : */
454 1 : pqsignal(SIGHUP, av_sighup_handler);
455 1 : pqsignal(SIGINT, StatementCancelHandler);
456 1 : pqsignal(SIGTERM, avl_sigterm_handler);
457 :
458 1 : pqsignal(SIGQUIT, quickdie);
459 1 : InitializeTimeouts(); /* establishes SIGALRM handler */
460 :
461 1 : pqsignal(SIGPIPE, SIG_IGN);
462 1 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
463 1 : pqsignal(SIGUSR2, avl_sigusr2_handler);
464 1 : pqsignal(SIGFPE, FloatExceptionHandler);
465 1 : pqsignal(SIGCHLD, SIG_DFL);
466 :
467 : /* Early initialization */
468 1 : BaseInit();
469 :
470 : /*
471 : * Create a per-backend PGPROC struct in shared memory, except in the
472 : * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
473 : * this before we can use LWLocks (and in the EXEC_BACKEND case we already
474 : * had to do some stuff with LWLocks).
475 : */
476 : #ifndef EXEC_BACKEND
477 1 : InitProcess();
478 : #endif
479 :
480 1 : InitPostgres(NULL, InvalidOid, NULL, InvalidOid, NULL);
481 :
482 1 : SetProcessingMode(NormalProcessing);
483 :
484 : /*
485 : * Create a memory context that we will do all our work in. We do this so
486 : * that we can reset the context during error recovery and thereby avoid
487 : * possible memory leaks.
488 : */
489 1 : AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
490 : "Autovacuum Launcher",
491 : ALLOCSET_DEFAULT_SIZES);
492 1 : MemoryContextSwitchTo(AutovacMemCxt);
493 :
494 : /*
495 : * If an exception is encountered, processing resumes here.
496 : *
497 : * This code is a stripped down version of PostgresMain error recovery.
498 : */
499 1 : if (sigsetjmp(local_sigjmp_buf, 1) != 0)
500 : {
501 : /* since not using PG_TRY, must reset error stack by hand */
502 0 : error_context_stack = NULL;
503 :
504 : /* Prevents interrupts while cleaning up */
505 0 : HOLD_INTERRUPTS();
506 :
507 : /* Forget any pending QueryCancel or timeout request */
508 0 : disable_all_timeouts(false);
509 0 : QueryCancelPending = false; /* second to avoid race condition */
510 :
511 : /* Report the error to the server log */
512 0 : EmitErrorReport();
513 :
514 : /* Abort the current transaction in order to recover */
515 0 : AbortCurrentTransaction();
516 :
517 : /*
518 : * Release any other resources, for the case where we were not in a
519 : * transaction.
520 : */
521 0 : LWLockReleaseAll();
522 0 : pgstat_report_wait_end();
523 0 : AbortBufferIO();
524 0 : UnlockBuffers();
525 0 : if (CurrentResourceOwner)
526 : {
527 0 : ResourceOwnerRelease(CurrentResourceOwner,
528 : RESOURCE_RELEASE_BEFORE_LOCKS,
529 : false, true);
530 : /* we needn't bother with the other ResourceOwnerRelease phases */
531 : }
532 0 : AtEOXact_Buffers(false);
533 0 : AtEOXact_SMgr();
534 0 : AtEOXact_Files();
535 0 : AtEOXact_HashTables(false);
536 :
537 : /*
538 : * Now return to normal top-level context and clear ErrorContext for
539 : * next time.
540 : */
541 0 : MemoryContextSwitchTo(AutovacMemCxt);
542 0 : FlushErrorState();
543 :
544 : /* Flush any leaked data in the top-level context */
545 0 : MemoryContextResetAndDeleteChildren(AutovacMemCxt);
546 :
547 : /* don't leave dangling pointers to freed memory */
548 0 : DatabaseListCxt = NULL;
549 0 : dlist_init(&DatabaseList);
550 :
551 : /*
552 : * Make sure pgstat also considers our stat data as gone. Note: we
553 : * mustn't use autovac_refresh_stats here.
554 : */
555 0 : pgstat_clear_snapshot();
556 :
557 : /* Now we can allow interrupts again */
558 0 : RESUME_INTERRUPTS();
559 :
560 : /* if in shutdown mode, no need for anything further; just go away */
561 0 : if (got_SIGTERM)
562 0 : goto shutdown;
563 :
564 : /*
565 : * Sleep at least 1 second after any error. We don't want to be
566 : * filling the error logs as fast as we can.
567 : */
568 0 : pg_usleep(1000000L);
569 : }
570 :
571 : /* We can now handle ereport(ERROR) */
572 1 : PG_exception_stack = &local_sigjmp_buf;
573 :
574 : /* must unblock signals before calling rebuild_database_list */
575 1 : PG_SETMASK(&UnBlockSig);
576 :
577 : /*
578 : * Force zero_damaged_pages OFF in the autovac process, even if it is set
579 : * in postgresql.conf. We don't really want such a dangerous option being
580 : * applied non-interactively.
581 : */
582 1 : SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);
583 :
584 : /*
585 : * Force settable timeouts off to avoid letting these settings prevent
586 : * regular maintenance from being executed.
587 : */
588 1 : SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
589 1 : SetConfigOption("lock_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
590 1 : SetConfigOption("idle_in_transaction_session_timeout", "0",
591 : PGC_SUSET, PGC_S_OVERRIDE);
592 :
593 : /*
594 : * Force default_transaction_isolation to READ COMMITTED. We don't want
595 : * to pay the overhead of serializable mode, nor add any risk of causing
596 : * deadlocks or delaying other transactions.
597 : */
598 1 : SetConfigOption("default_transaction_isolation", "read committed",
599 : PGC_SUSET, PGC_S_OVERRIDE);
600 :
601 : /*
602 : * In emergency mode, just start a worker (unless shutdown was requested)
603 : * and go away.
604 : */
605 1 : if (!AutoVacuumingActive())
606 : {
607 0 : if (!got_SIGTERM)
608 0 : do_start_worker();
609 0 : proc_exit(0); /* done */
610 : }
611 :
612 1 : AutoVacuumShmem->av_launcherpid = MyProcPid;
613 :
614 : /*
615 : * Create the initial database list. The invariant we want this list to
616 : * keep is that it's ordered by decreasing next_time. As soon as an entry
617 : * is updated to a higher time, it will be moved to the front (which is
618 : * correct because the only operation is to add autovacuum_naptime to the
619 : * entry, and time always increases).
620 : */
621 1 : rebuild_database_list(InvalidOid);
622 :
623 : /* loop until shutdown request */
624 90 : while (!got_SIGTERM)
625 : {
626 : struct timeval nap;
627 89 : TimestampTz current_time = 0;
628 : bool can_launch;
629 : int rc;
630 :
631 : /*
632 : * This loop is a bit different from the normal use of WaitLatch,
633 : * because we'd like to sleep before the first launch of a child
634 : * process. So it's WaitLatch, then ResetLatch, then check for
635 : * wakening conditions.
636 : */
637 :
638 89 : launcher_determine_sleep(!dlist_is_empty(&AutoVacuumShmem->av_freeWorkers),
639 : false, &nap);
640 :
641 : /*
642 : * Wait until naptime expires or we get some type of signal (all the
643 : * signal handlers will wake us by calling SetLatch).
644 : */
645 89 : rc = WaitLatch(MyLatch,
646 : WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
647 89 : (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L),
648 : WAIT_EVENT_AUTOVACUUM_MAIN);
649 :
650 89 : ResetLatch(MyLatch);
651 :
652 : /* Process sinval catchup interrupts that happened while sleeping */
653 89 : ProcessCatchupInterrupt();
654 :
655 : /*
656 : * Emergency bailout if postmaster has died. This is to avoid the
657 : * necessity for manual cleanup of all postmaster children.
658 : */
659 89 : if (rc & WL_POSTMASTER_DEATH)
660 0 : proc_exit(1);
661 :
662 : /* the normal shutdown case */
663 89 : if (got_SIGTERM)
664 2 : break;
665 :
666 88 : if (got_SIGHUP)
667 : {
668 0 : got_SIGHUP = false;
669 0 : ProcessConfigFile(PGC_SIGHUP);
670 :
671 : /* shutdown requested in config file? */
672 0 : if (!AutoVacuumingActive())
673 0 : break;
674 :
675 : /* rebalance in case the default cost parameters changed */
676 0 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
677 0 : autovac_balance_cost();
678 0 : LWLockRelease(AutovacuumLock);
679 :
680 : /* rebuild the list in case the naptime changed */
681 0 : rebuild_database_list(InvalidOid);
682 : }
683 :
684 : /*
685 : * a worker finished, or postmaster signalled failure to start a
686 : * worker
687 : */
688 88 : if (got_SIGUSR2)
689 : {
690 7 : got_SIGUSR2 = false;
691 :
692 : /* rebalance cost limits, if needed */
693 7 : if (AutoVacuumShmem->av_signal[AutoVacRebalance])
694 : {
695 3 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
696 3 : AutoVacuumShmem->av_signal[AutoVacRebalance] = false;
697 3 : autovac_balance_cost();
698 3 : LWLockRelease(AutovacuumLock);
699 : }
700 :
701 7 : if (AutoVacuumShmem->av_signal[AutoVacForkFailed])
702 : {
703 : /*
704 : * If the postmaster failed to start a new worker, we sleep
705 : * for a little while and resend the signal. The new worker's
706 : * state is still in memory, so this is sufficient. After
707 : * that, we restart the main loop.
708 : *
709 : * XXX should we put a limit to the number of times we retry?
710 : * I don't think it makes much sense, because a future start
711 : * of a worker will continue to fail in the same way.
712 : */
713 0 : AutoVacuumShmem->av_signal[AutoVacForkFailed] = false;
714 0 : pg_usleep(1000000L); /* 1s */
715 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
716 0 : continue;
717 : }
718 : }
719 :
720 : /*
721 : * There are some conditions that we need to check before trying to
722 : * start a worker. First, we need to make sure that there is a worker
723 : * slot available. Second, we need to make sure that no other worker
724 : * failed while starting up.
725 : */
726 :
727 88 : current_time = GetCurrentTimestamp();
728 88 : LWLockAcquire(AutovacuumLock, LW_SHARED);
729 :
730 88 : can_launch = !dlist_is_empty(&AutoVacuumShmem->av_freeWorkers);
731 :
732 88 : if (AutoVacuumShmem->av_startingWorker != NULL)
733 : {
734 : int waittime;
735 0 : WorkerInfo worker = AutoVacuumShmem->av_startingWorker;
736 :
737 : /*
738 : * We can't launch another worker when another one is still
739 : * starting up (or failed while doing so), so just sleep for a bit
740 : * more; that worker will wake us up again as soon as it's ready.
741 : * We will only wait autovacuum_naptime seconds (up to a maximum
742 : * of 60 seconds) for this to happen however. Note that failure
743 : * to connect to a particular database is not a problem here,
744 : * because the worker removes itself from the startingWorker
745 : * pointer before trying to connect. Problems detected by the
746 : * postmaster (like fork() failure) are also reported and handled
747 : * differently. The only problems that may cause this code to
748 : * fire are errors in the earlier sections of AutoVacWorkerMain,
749 : * before the worker removes the WorkerInfo from the
750 : * startingWorker pointer.
751 : */
752 0 : waittime = Min(autovacuum_naptime, 60) * 1000;
753 0 : if (TimestampDifferenceExceeds(worker->wi_launchtime, current_time,
754 : waittime))
755 : {
756 0 : LWLockRelease(AutovacuumLock);
757 0 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
758 :
759 : /*
760 : * No other process can put a worker in starting mode, so if
761 : * startingWorker is still INVALID after exchanging our lock,
762 : * we assume it's the same one we saw above (so we don't
763 : * recheck the launch time).
764 : */
765 0 : if (AutoVacuumShmem->av_startingWorker != NULL)
766 : {
767 0 : worker = AutoVacuumShmem->av_startingWorker;
768 0 : worker->wi_dboid = InvalidOid;
769 0 : worker->wi_tableoid = InvalidOid;
770 0 : worker->wi_sharedrel = false;
771 0 : worker->wi_proc = NULL;
772 0 : worker->wi_launchtime = 0;
773 0 : dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
774 : &worker->wi_links);
775 0 : AutoVacuumShmem->av_startingWorker = NULL;
776 0 : elog(WARNING, "worker took too long to start; canceled");
777 : }
778 : }
779 : else
780 0 : can_launch = false;
781 : }
782 88 : LWLockRelease(AutovacuumLock); /* either shared or exclusive */
783 :
784 : /* if we can't do anything, just go back to sleep */
785 88 : if (!can_launch)
786 0 : continue;
787 :
788 : /* We're OK to start a new worker */
789 :
790 88 : if (dlist_is_empty(&DatabaseList))
791 : {
792 : /*
793 : * Special case when the list is empty: start a worker right away.
794 : * This covers the initial case, when no database is in pgstats
795 : * (thus the list is empty). Note that the constraints in
796 : * launcher_determine_sleep keep us from starting workers too
797 : * quickly (at most once every autovacuum_naptime when the list is
798 : * empty).
799 : */
800 2 : launch_worker(current_time);
801 : }
802 : else
803 : {
804 : /*
805 : * because rebuild_database_list constructs a list with most
806 : * distant adl_next_worker first, we obtain our database from the
807 : * tail of the list.
808 : */
809 : avl_dbase *avdb;
810 :
811 86 : avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);
812 :
813 : /*
814 : * launch a worker if next_worker is right now or it is in the
815 : * past
816 : */
817 86 : if (TimestampDifferenceExceeds(avdb->adl_next_worker,
818 : current_time, 0))
819 3 : launch_worker(current_time);
820 : }
821 : }
822 :
823 : /* Normal exit from the autovac launcher is here */
824 : shutdown:
825 1 : ereport(DEBUG1,
826 : (errmsg("autovacuum launcher shutting down")));
827 1 : AutoVacuumShmem->av_launcherpid = 0;
828 :
829 1 : proc_exit(0); /* done */
830 : }
831 :
832 : /*
833 : * Determine the time to sleep, based on the database list.
834 : *
835 : * The "canlaunch" parameter indicates whether we can start a worker right now,
836 : * for example due to the workers being all busy. If this is false, we will
837 : * cause a long sleep, which will be interrupted when a worker exits.
838 : */
839 : static void
840 89 : launcher_determine_sleep(bool canlaunch, bool recursing, struct timeval *nap)
841 : {
842 : /*
843 : * We sleep until the next scheduled vacuum. We trust that when the
844 : * database list was built, care was taken so that no entries have times
845 : * in the past; if the first entry has too close a next_worker value, or a
846 : * time in the past, we will sleep a small nominal time.
847 : */
848 89 : if (!canlaunch)
849 : {
850 0 : nap->tv_sec = autovacuum_naptime;
851 0 : nap->tv_usec = 0;
852 : }
853 89 : else if (!dlist_is_empty(&DatabaseList))
854 : {
855 87 : TimestampTz current_time = GetCurrentTimestamp();
856 : TimestampTz next_wakeup;
857 : avl_dbase *avdb;
858 : long secs;
859 : int usecs;
860 :
861 87 : avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);
862 :
863 87 : next_wakeup = avdb->adl_next_worker;
864 87 : TimestampDifference(current_time, next_wakeup, &secs, &usecs);
865 :
866 87 : nap->tv_sec = secs;
867 87 : nap->tv_usec = usecs;
868 : }
869 : else
870 : {
871 : /* list is empty, sleep for whole autovacuum_naptime seconds */
872 2 : nap->tv_sec = autovacuum_naptime;
873 2 : nap->tv_usec = 0;
874 : }
875 :
876 : /*
877 : * If the result is exactly zero, it means a database had an entry with
878 : * time in the past. Rebuild the list so that the databases are evenly
879 : * distributed again, and recalculate the time to sleep. This can happen
880 : * if there are more tables needing vacuum than workers, and they all take
881 : * longer to vacuum than autovacuum_naptime.
882 : *
883 : * We only recurse once. rebuild_database_list should always return times
884 : * in the future, but it seems best not to trust too much on that.
885 : */
886 89 : if (nap->tv_sec == 0 && nap->tv_usec == 0 && !recursing)
887 : {
888 0 : rebuild_database_list(InvalidOid);
889 0 : launcher_determine_sleep(canlaunch, true, nap);
890 89 : return;
891 : }
892 :
893 : /* The smallest time we'll allow the launcher to sleep. */
894 89 : if (nap->tv_sec <= 0 && nap->tv_usec <= MIN_AUTOVAC_SLEEPTIME * 1000)
895 : {
896 2 : nap->tv_sec = 0;
897 2 : nap->tv_usec = MIN_AUTOVAC_SLEEPTIME * 1000;
898 : }
899 :
900 : /*
901 : * If the sleep time is too large, clamp it to an arbitrary maximum (plus
902 : * any fractional seconds, for simplicity). This avoids an essentially
903 : * infinite sleep in strange cases like the system clock going backwards a
904 : * few years.
905 : */
906 89 : if (nap->tv_sec > MAX_AUTOVAC_SLEEPTIME)
907 0 : nap->tv_sec = MAX_AUTOVAC_SLEEPTIME;
908 : }
909 :
910 : /*
911 : * Build an updated DatabaseList. It must only contain databases that appear
912 : * in pgstats, and must be sorted by next_worker from highest to lowest,
913 : * distributed regularly across the next autovacuum_naptime interval.
914 : *
915 : * Receives the Oid of the database that made this list be generated (we call
916 : * this the "new" database, because when the database was already present on
917 : * the list, we expect that this function is not called at all). The
918 : * preexisting list, if any, will be used to preserve the order of the
919 : * databases in the autovacuum_naptime period. The new database is put at the
920 : * end of the interval. The actual values are not saved, which should not be
921 : * much of a problem.
922 : */
923 : static void
924 2 : rebuild_database_list(Oid newdb)
925 : {
926 : List *dblist;
927 : ListCell *cell;
928 : MemoryContext newcxt;
929 : MemoryContext oldcxt;
930 : MemoryContext tmpcxt;
931 : HASHCTL hctl;
932 : int score;
933 : int nelems;
934 : HTAB *dbhash;
935 : dlist_iter iter;
936 :
937 : /* use fresh stats */
938 2 : autovac_refresh_stats();
939 :
940 2 : newcxt = AllocSetContextCreate(AutovacMemCxt,
941 : "AV dblist",
942 : ALLOCSET_DEFAULT_SIZES);
943 2 : tmpcxt = AllocSetContextCreate(newcxt,
944 : "tmp AV dblist",
945 : ALLOCSET_DEFAULT_SIZES);
946 2 : oldcxt = MemoryContextSwitchTo(tmpcxt);
947 :
948 : /*
949 : * Implementing this is not as simple as it sounds, because we need to put
950 : * the new database at the end of the list; next the databases that were
951 : * already on the list, and finally (at the tail of the list) all the
952 : * other databases that are not on the existing list.
953 : *
954 : * To do this, we build an empty hash table of scored databases. We will
955 : * start with the lowest score (zero) for the new database, then
956 : * increasing scores for the databases in the existing list, in order, and
957 : * lastly increasing scores for all databases gotten via
958 : * get_database_list() that are not already on the hash.
959 : *
960 : * Then we will put all the hash elements into an array, sort the array by
961 : * score, and finally put the array elements into the new doubly linked
962 : * list.
963 : */
964 2 : hctl.keysize = sizeof(Oid);
965 2 : hctl.entrysize = sizeof(avl_dbase);
966 2 : hctl.hcxt = tmpcxt;
967 2 : dbhash = hash_create("db hash", 20, &hctl, /* magic number here FIXME */
968 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
969 :
970 : /* start by inserting the new database */
971 2 : score = 0;
972 2 : if (OidIsValid(newdb))
973 : {
974 : avl_dbase *db;
975 : PgStat_StatDBEntry *entry;
976 :
977 : /* only consider this database if it has a pgstat entry */
978 1 : entry = pgstat_fetch_stat_dbentry(newdb);
979 1 : if (entry != NULL)
980 : {
981 : /* we assume it isn't found because the hash was just created */
982 1 : db = hash_search(dbhash, &newdb, HASH_ENTER, NULL);
983 :
984 : /* hash_search already filled in the key */
985 1 : db->adl_score = score++;
986 : /* next_worker is filled in later */
987 : }
988 : }
989 :
990 : /* Now insert the databases from the existing list */
991 2 : dlist_foreach(iter, &DatabaseList)
992 : {
993 0 : avl_dbase *avdb = dlist_container(avl_dbase, adl_node, iter.cur);
994 : avl_dbase *db;
995 : bool found;
996 : PgStat_StatDBEntry *entry;
997 :
998 : /*
999 : * skip databases with no stat entries -- in particular, this gets rid
1000 : * of dropped databases
1001 : */
1002 0 : entry = pgstat_fetch_stat_dbentry(avdb->adl_datid);
1003 0 : if (entry == NULL)
1004 0 : continue;
1005 :
1006 0 : db = hash_search(dbhash, &(avdb->adl_datid), HASH_ENTER, &found);
1007 :
1008 0 : if (!found)
1009 : {
1010 : /* hash_search already filled in the key */
1011 0 : db->adl_score = score++;
1012 : /* next_worker is filled in later */
1013 : }
1014 : }
1015 :
1016 : /* finally, insert all qualifying databases not previously inserted */
1017 2 : dblist = get_database_list();
1018 9 : foreach(cell, dblist)
1019 : {
1020 7 : avw_dbase *avdb = lfirst(cell);
1021 : avl_dbase *db;
1022 : bool found;
1023 : PgStat_StatDBEntry *entry;
1024 :
1025 : /* only consider databases with a pgstat entry */
1026 7 : entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
1027 7 : if (entry == NULL)
1028 5 : continue;
1029 :
1030 2 : db = hash_search(dbhash, &(avdb->adw_datid), HASH_ENTER, &found);
1031 : /* only update the score if the database was not already on the hash */
1032 2 : if (!found)
1033 : {
1034 : /* hash_search already filled in the key */
1035 1 : db->adl_score = score++;
1036 : /* next_worker is filled in later */
1037 : }
1038 : }
1039 2 : nelems = score;
1040 :
1041 : /* from here on, the allocated memory belongs to the new list */
1042 2 : MemoryContextSwitchTo(newcxt);
1043 2 : dlist_init(&DatabaseList);
1044 :
1045 2 : if (nelems > 0)
1046 : {
1047 : TimestampTz current_time;
1048 : int millis_increment;
1049 : avl_dbase *dbary;
1050 : avl_dbase *db;
1051 : HASH_SEQ_STATUS seq;
1052 : int i;
1053 :
1054 : /* put all the hash elements into an array */
1055 1 : dbary = palloc(nelems * sizeof(avl_dbase));
1056 :
1057 1 : i = 0;
1058 1 : hash_seq_init(&seq, dbhash);
1059 4 : while ((db = hash_seq_search(&seq)) != NULL)
1060 2 : memcpy(&(dbary[i++]), db, sizeof(avl_dbase));
1061 :
1062 : /* sort the array */
1063 1 : qsort(dbary, nelems, sizeof(avl_dbase), db_comparator);
1064 :
1065 : /*
1066 : * Determine the time interval between databases in the schedule. If
1067 : * we see that the configured naptime would take us to sleep times
1068 : * lower than our min sleep time (which launcher_determine_sleep is
1069 : * coded not to allow), silently use a larger naptime (but don't touch
1070 : * the GUC variable).
1071 : */
1072 1 : millis_increment = 1000.0 * autovacuum_naptime / nelems;
1073 1 : if (millis_increment <= MIN_AUTOVAC_SLEEPTIME)
1074 0 : millis_increment = MIN_AUTOVAC_SLEEPTIME * 1.1;
1075 :
1076 1 : current_time = GetCurrentTimestamp();
1077 :
1078 : /*
1079 : * move the elements from the array into the dllist, setting the
1080 : * next_worker while walking the array
1081 : */
1082 3 : for (i = 0; i < nelems; i++)
1083 : {
1084 2 : avl_dbase *db = &(dbary[i]);
1085 :
1086 2 : current_time = TimestampTzPlusMilliseconds(current_time,
1087 : millis_increment);
1088 2 : db->adl_next_worker = current_time;
1089 :
1090 : /* later elements should go closer to the head of the list */
1091 2 : dlist_push_head(&DatabaseList, &db->adl_node);
1092 : }
1093 : }
1094 :
1095 : /* all done, clean up memory */
1096 2 : if (DatabaseListCxt != NULL)
1097 1 : MemoryContextDelete(DatabaseListCxt);
1098 2 : MemoryContextDelete(tmpcxt);
1099 2 : DatabaseListCxt = newcxt;
1100 2 : MemoryContextSwitchTo(oldcxt);
1101 2 : }
1102 :
1103 : /* qsort comparator for avl_dbase, using adl_score */
1104 : static int
1105 1 : db_comparator(const void *a, const void *b)
1106 : {
1107 1 : if (((const avl_dbase *) a)->adl_score == ((const avl_dbase *) b)->adl_score)
1108 0 : return 0;
1109 : else
1110 1 : return (((const avl_dbase *) a)->adl_score < ((const avl_dbase *) b)->adl_score) ? 1 : -1;
1111 : }
1112 :
1113 : /*
1114 : * do_start_worker
1115 : *
1116 : * Bare-bones procedure for starting an autovacuum worker from the launcher.
1117 : * It determines what database to work on, sets up shared memory stuff and
1118 : * signals postmaster to start the worker. It fails gracefully if invoked when
1119 : * autovacuum_workers are already active.
1120 : *
1121 : * Return value is the OID of the database that the worker is going to process,
1122 : * or InvalidOid if no worker was actually started.
1123 : */
1124 : static Oid
1125 5 : do_start_worker(void)
1126 : {
1127 : List *dblist;
1128 : ListCell *cell;
1129 : TransactionId xidForceLimit;
1130 : MultiXactId multiForceLimit;
1131 : bool for_xid_wrap;
1132 : bool for_multi_wrap;
1133 : avw_dbase *avdb;
1134 : TimestampTz current_time;
1135 5 : bool skipit = false;
1136 5 : Oid retval = InvalidOid;
1137 : MemoryContext tmpcxt,
1138 : oldcxt;
1139 :
1140 : /* return quickly when there are no free workers */
1141 5 : LWLockAcquire(AutovacuumLock, LW_SHARED);
1142 5 : if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers))
1143 : {
1144 0 : LWLockRelease(AutovacuumLock);
1145 0 : return InvalidOid;
1146 : }
1147 5 : LWLockRelease(AutovacuumLock);
1148 :
1149 : /*
1150 : * Create and switch to a temporary context to avoid leaking the memory
1151 : * allocated for the database list.
1152 : */
1153 5 : tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
1154 : "Start worker tmp cxt",
1155 : ALLOCSET_DEFAULT_SIZES);
1156 5 : oldcxt = MemoryContextSwitchTo(tmpcxt);
1157 :
1158 : /* use fresh stats */
1159 5 : autovac_refresh_stats();
1160 :
1161 : /* Get a list of databases */
1162 5 : dblist = get_database_list();
1163 :
1164 : /*
1165 : * Determine the oldest datfrozenxid/relfrozenxid that we will allow to
1166 : * pass without forcing a vacuum. (This limit can be tightened for
1167 : * particular tables, but not loosened.)
1168 : */
1169 5 : recentXid = ReadNewTransactionId();
1170 5 : xidForceLimit = recentXid - autovacuum_freeze_max_age;
1171 : /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */
1172 : /* this can cause the limit to go backwards by 3, but that's OK */
1173 5 : if (xidForceLimit < FirstNormalTransactionId)
1174 0 : xidForceLimit -= FirstNormalTransactionId;
1175 :
1176 : /* Also determine the oldest datminmxid we will consider. */
1177 5 : recentMulti = ReadNextMultiXactId();
1178 5 : multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
1179 5 : if (multiForceLimit < FirstMultiXactId)
1180 0 : multiForceLimit -= FirstMultiXactId;
1181 :
1182 : /*
1183 : * Choose a database to connect to. We pick the database that was least
1184 : * recently auto-vacuumed, or one that needs vacuuming to prevent Xid
1185 : * wraparound-related data loss. If any db at risk of Xid wraparound is
1186 : * found, we pick the one with oldest datfrozenxid, independently of
1187 : * autovacuum times; similarly we pick the one with the oldest datminmxid
1188 : * if any is in MultiXactId wraparound. Note that those in Xid wraparound
1189 : * danger are given more priority than those in multi wraparound danger.
1190 : *
1191 : * Note that a database with no stats entry is not considered, except for
1192 : * Xid wraparound purposes. The theory is that if no one has ever
1193 : * connected to it since the stats were last initialized, it doesn't need
1194 : * vacuuming.
1195 : *
1196 : * XXX This could be improved if we had more info about whether it needs
1197 : * vacuuming before connecting to it. Perhaps look through the pgstats
1198 : * data for the database's tables? One idea is to keep track of the
1199 : * number of new and dead tuples per database in pgstats. However it
1200 : * isn't clear how to construct a metric that measures that and not cause
1201 : * starvation for less busy databases.
1202 : */
1203 5 : avdb = NULL;
1204 5 : for_xid_wrap = false;
1205 5 : for_multi_wrap = false;
1206 5 : current_time = GetCurrentTimestamp();
1207 24 : foreach(cell, dblist)
1208 : {
1209 19 : avw_dbase *tmp = lfirst(cell);
1210 : dlist_iter iter;
1211 :
1212 : /* Check to see if this one is at risk of wraparound */
1213 19 : if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
1214 : {
1215 0 : if (avdb == NULL ||
1216 0 : TransactionIdPrecedes(tmp->adw_frozenxid,
1217 : avdb->adw_frozenxid))
1218 0 : avdb = tmp;
1219 0 : for_xid_wrap = true;
1220 14 : continue;
1221 : }
1222 19 : else if (for_xid_wrap)
1223 0 : continue; /* ignore not-at-risk DBs */
1224 19 : else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
1225 : {
1226 0 : if (avdb == NULL ||
1227 0 : MultiXactIdPrecedes(tmp->adw_minmulti, avdb->adw_minmulti))
1228 0 : avdb = tmp;
1229 0 : for_multi_wrap = true;
1230 0 : continue;
1231 : }
1232 19 : else if (for_multi_wrap)
1233 0 : continue; /* ignore not-at-risk DBs */
1234 :
1235 : /* Find pgstat entry if any */
1236 19 : tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
1237 :
1238 : /*
1239 : * Skip a database with no pgstat entry; it means it hasn't seen any
1240 : * activity.
1241 : */
1242 19 : if (!tmp->adw_entry)
1243 11 : continue;
1244 :
1245 : /*
1246 : * Also, skip a database that appears on the database list as having
1247 : * been processed recently (less than autovacuum_naptime seconds ago).
1248 : * We do this so that we don't select a database which we just
1249 : * selected, but that pgstat hasn't gotten around to updating the last
1250 : * autovacuum time yet.
1251 : */
1252 8 : skipit = false;
1253 :
1254 11 : dlist_reverse_foreach(iter, &DatabaseList)
1255 : {
1256 9 : avl_dbase *dbp = dlist_container(avl_dbase, adl_node, iter.cur);
1257 :
1258 9 : if (dbp->adl_datid == tmp->adw_datid)
1259 : {
1260 : /*
1261 : * Skip this database if its next_worker value falls between
1262 : * the current time and the current time plus naptime.
1263 : */
1264 6 : if (!TimestampDifferenceExceeds(dbp->adl_next_worker,
1265 3 : current_time, 0) &&
1266 3 : !TimestampDifferenceExceeds(current_time,
1267 : dbp->adl_next_worker,
1268 : autovacuum_naptime * 1000))
1269 3 : skipit = true;
1270 :
1271 6 : break;
1272 : }
1273 : }
1274 8 : if (skipit)
1275 3 : continue;
1276 :
1277 : /*
1278 : * Remember the db with oldest autovac time. (If we are here, both
1279 : * tmp->entry and db->entry must be non-null.)
1280 : */
1281 6 : if (avdb == NULL ||
1282 1 : tmp->adw_entry->last_autovac_time < avdb->adw_entry->last_autovac_time)
1283 4 : avdb = tmp;
1284 : }
1285 :
1286 : /* Found a database -- process it */
1287 5 : if (avdb != NULL)
1288 : {
1289 : WorkerInfo worker;
1290 : dlist_node *wptr;
1291 :
1292 4 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
1293 :
1294 : /*
1295 : * Get a worker entry from the freelist. We checked above, so there
1296 : * really should be a free slot.
1297 : */
1298 4 : wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
1299 :
1300 4 : worker = dlist_container(WorkerInfoData, wi_links, wptr);
1301 4 : worker->wi_dboid = avdb->adw_datid;
1302 4 : worker->wi_proc = NULL;
1303 4 : worker->wi_launchtime = GetCurrentTimestamp();
1304 :
1305 4 : AutoVacuumShmem->av_startingWorker = worker;
1306 :
1307 4 : LWLockRelease(AutovacuumLock);
1308 :
1309 4 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
1310 :
1311 4 : retval = avdb->adw_datid;
1312 : }
1313 1 : else if (skipit)
1314 : {
1315 : /*
1316 : * If we skipped all databases on the list, rebuild it, because it
1317 : * probably contains a dropped database.
1318 : */
1319 0 : rebuild_database_list(InvalidOid);
1320 : }
1321 :
1322 5 : MemoryContextSwitchTo(oldcxt);
1323 5 : MemoryContextDelete(tmpcxt);
1324 :
1325 5 : return retval;
1326 : }
1327 :
1328 : /*
1329 : * launch_worker
1330 : *
1331 : * Wrapper for starting a worker from the launcher. Besides actually starting
1332 : * it, update the database list to reflect the next time that another one will
1333 : * need to be started on the selected database. The actual database choice is
1334 : * left to do_start_worker.
1335 : *
1336 : * This routine is also expected to insert an entry into the database list if
1337 : * the selected database was previously absent from the list.
1338 : */
1339 : static void
1340 5 : launch_worker(TimestampTz now)
1341 : {
1342 : Oid dbid;
1343 : dlist_iter iter;
1344 :
1345 5 : dbid = do_start_worker();
1346 5 : if (OidIsValid(dbid))
1347 : {
1348 4 : bool found = false;
1349 :
1350 : /*
1351 : * Walk the database list and update the corresponding entry. If the
1352 : * database is not on the list, we'll recreate the list.
1353 : */
1354 7 : dlist_foreach(iter, &DatabaseList)
1355 : {
1356 6 : avl_dbase *avdb = dlist_container(avl_dbase, adl_node, iter.cur);
1357 :
1358 6 : if (avdb->adl_datid == dbid)
1359 : {
1360 3 : found = true;
1361 :
1362 : /*
1363 : * add autovacuum_naptime seconds to the current time, and use
1364 : * that as the new "next_worker" field for this database.
1365 : */
1366 3 : avdb->adl_next_worker =
1367 3 : TimestampTzPlusMilliseconds(now, autovacuum_naptime * 1000);
1368 :
1369 3 : dlist_move_head(&DatabaseList, iter.cur);
1370 3 : break;
1371 : }
1372 : }
1373 :
1374 : /*
1375 : * If the database was not present in the database list, we rebuild
1376 : * the list. It's possible that the database does not get into the
1377 : * list anyway, for example if it's a database that doesn't have a
1378 : * pgstat entry, but this is not a problem because we don't want to
1379 : * schedule workers regularly into those in any case.
1380 : */
1381 4 : if (!found)
1382 1 : rebuild_database_list(dbid);
1383 : }
1384 5 : }
1385 :
1386 : /*
1387 : * Called from postmaster to signal a failure to fork a process to become
1388 : * worker. The postmaster should kill(SIGUSR2) the launcher shortly
1389 : * after calling this function.
1390 : */
1391 : void
1392 0 : AutoVacWorkerFailed(void)
1393 : {
1394 0 : AutoVacuumShmem->av_signal[AutoVacForkFailed] = true;
1395 0 : }
1396 :
1397 : /* SIGHUP: set flag to re-read config file at next convenient time */
1398 : static void
1399 0 : av_sighup_handler(SIGNAL_ARGS)
1400 : {
1401 0 : int save_errno = errno;
1402 :
1403 0 : got_SIGHUP = true;
1404 0 : SetLatch(MyLatch);
1405 :
1406 0 : errno = save_errno;
1407 0 : }
1408 :
1409 : /* SIGUSR2: a worker is up and running, or just finished, or failed to fork */
1410 : static void
1411 7 : avl_sigusr2_handler(SIGNAL_ARGS)
1412 : {
1413 7 : int save_errno = errno;
1414 :
1415 7 : got_SIGUSR2 = true;
1416 7 : SetLatch(MyLatch);
1417 :
1418 7 : errno = save_errno;
1419 7 : }
1420 :
1421 : /* SIGTERM: time to die */
1422 : static void
1423 1 : avl_sigterm_handler(SIGNAL_ARGS)
1424 : {
1425 1 : int save_errno = errno;
1426 :
1427 1 : got_SIGTERM = true;
1428 1 : SetLatch(MyLatch);
1429 :
1430 1 : errno = save_errno;
1431 1 : }
1432 :
1433 :
1434 : /********************************************************************
1435 : * AUTOVACUUM WORKER CODE
1436 : ********************************************************************/
1437 :
1438 : #ifdef EXEC_BACKEND
1439 : /*
1440 : * forkexec routines for the autovacuum worker.
1441 : *
1442 : * Format up the arglist, then fork and exec.
1443 : */
1444 : static pid_t
1445 : avworker_forkexec(void)
1446 : {
1447 : char *av[10];
1448 : int ac = 0;
1449 :
1450 : av[ac++] = "postgres";
1451 : av[ac++] = "--forkavworker";
1452 : av[ac++] = NULL; /* filled in by postmaster_forkexec */
1453 : av[ac] = NULL;
1454 :
1455 : Assert(ac < lengthof(av));
1456 :
1457 : return postmaster_forkexec(ac, av);
1458 : }
1459 :
1460 : /*
1461 : * We need this set from the outside, before InitProcess is called
1462 : */
1463 : void
1464 : AutovacuumWorkerIAm(void)
1465 : {
1466 : am_autovacuum_worker = true;
1467 : }
1468 : #endif
1469 :
1470 : /*
1471 : * Main entry point for autovacuum worker process.
1472 : *
1473 : * This code is heavily based on pgarch.c, q.v.
1474 : */
1475 : int
1476 4 : StartAutoVacWorker(void)
1477 : {
1478 : pid_t worker_pid;
1479 :
1480 : #ifdef EXEC_BACKEND
1481 : switch ((worker_pid = avworker_forkexec()))
1482 : #else
1483 4 : switch ((worker_pid = fork_process()))
1484 : #endif
1485 : {
1486 : case -1:
1487 0 : ereport(LOG,
1488 : (errmsg("could not fork autovacuum worker process: %m")));
1489 0 : return 0;
1490 :
1491 : #ifndef EXEC_BACKEND
1492 : case 0:
1493 : /* in postmaster child ... */
1494 4 : InitPostmasterChild();
1495 :
1496 : /* Close the postmaster's sockets */
1497 4 : ClosePostmasterPorts(false);
1498 :
1499 4 : AutoVacWorkerMain(0, NULL);
1500 : break;
1501 : #endif
1502 : default:
1503 4 : return (int) worker_pid;
1504 : }
1505 :
1506 : /* shouldn't get here */
1507 : return 0;
1508 : }
1509 :
1510 : /*
1511 : * AutoVacWorkerMain
1512 : */
1513 : NON_EXEC_STATIC void
1514 4 : AutoVacWorkerMain(int argc, char *argv[])
1515 : {
1516 : sigjmp_buf local_sigjmp_buf;
1517 : Oid dbid;
1518 :
1519 4 : am_autovacuum_worker = true;
1520 :
1521 : /* Identify myself via ps */
1522 4 : init_ps_display("autovacuum worker process", "", "", "");
1523 :
1524 4 : SetProcessingMode(InitProcessing);
1525 :
1526 : /*
1527 : * Set up signal handlers. We operate on databases much like a regular
1528 : * backend, so we use the same signal handling. See equivalent code in
1529 : * tcop/postgres.c.
1530 : */
1531 4 : pqsignal(SIGHUP, av_sighup_handler);
1532 :
1533 : /*
1534 : * SIGINT is used to signal canceling the current table's vacuum; SIGTERM
1535 : * means abort and exit cleanly, and SIGQUIT means abandon ship.
1536 : */
1537 4 : pqsignal(SIGINT, StatementCancelHandler);
1538 4 : pqsignal(SIGTERM, die);
1539 4 : pqsignal(SIGQUIT, quickdie);
1540 4 : InitializeTimeouts(); /* establishes SIGALRM handler */
1541 :
1542 4 : pqsignal(SIGPIPE, SIG_IGN);
1543 4 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
1544 4 : pqsignal(SIGUSR2, SIG_IGN);
1545 4 : pqsignal(SIGFPE, FloatExceptionHandler);
1546 4 : pqsignal(SIGCHLD, SIG_DFL);
1547 :
1548 : /* Early initialization */
1549 4 : BaseInit();
1550 :
1551 : /*
1552 : * Create a per-backend PGPROC struct in shared memory, except in the
1553 : * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
1554 : * this before we can use LWLocks (and in the EXEC_BACKEND case we already
1555 : * had to do some stuff with LWLocks).
1556 : */
1557 : #ifndef EXEC_BACKEND
1558 4 : InitProcess();
1559 : #endif
1560 :
1561 : /*
1562 : * If an exception is encountered, processing resumes here.
1563 : *
1564 : * See notes in postgres.c about the design of this coding.
1565 : */
1566 4 : if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1567 : {
1568 : /* Prevents interrupts while cleaning up */
1569 0 : HOLD_INTERRUPTS();
1570 :
1571 : /* Report the error to the server log */
1572 0 : EmitErrorReport();
1573 :
1574 : /*
1575 : * We can now go away. Note that because we called InitProcess, a
1576 : * callback was registered to do ProcKill, which will clean up
1577 : * necessary state.
1578 : */
1579 0 : proc_exit(0);
1580 : }
1581 :
1582 : /* We can now handle ereport(ERROR) */
1583 4 : PG_exception_stack = &local_sigjmp_buf;
1584 :
1585 4 : PG_SETMASK(&UnBlockSig);
1586 :
1587 : /*
1588 : * Force zero_damaged_pages OFF in the autovac process, even if it is set
1589 : * in postgresql.conf. We don't really want such a dangerous option being
1590 : * applied non-interactively.
1591 : */
1592 4 : SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);
1593 :
1594 : /*
1595 : * Force settable timeouts off to avoid letting these settings prevent
1596 : * regular maintenance from being executed.
1597 : */
1598 4 : SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
1599 4 : SetConfigOption("lock_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
1600 4 : SetConfigOption("idle_in_transaction_session_timeout", "0",
1601 : PGC_SUSET, PGC_S_OVERRIDE);
1602 :
1603 : /*
1604 : * Force default_transaction_isolation to READ COMMITTED. We don't want
1605 : * to pay the overhead of serializable mode, nor add any risk of causing
1606 : * deadlocks or delaying other transactions.
1607 : */
1608 4 : SetConfigOption("default_transaction_isolation", "read committed",
1609 : PGC_SUSET, PGC_S_OVERRIDE);
1610 :
1611 : /*
1612 : * Force synchronous replication off to allow regular maintenance even if
1613 : * we are waiting for standbys to connect. This is important to ensure we
1614 : * aren't blocked from performing anti-wraparound tasks.
1615 : */
1616 4 : if (synchronous_commit > SYNCHRONOUS_COMMIT_LOCAL_FLUSH)
1617 4 : SetConfigOption("synchronous_commit", "local",
1618 : PGC_SUSET, PGC_S_OVERRIDE);
1619 :
1620 : /*
1621 : * Get the info about the database we're going to work on.
1622 : */
1623 4 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
1624 :
1625 : /*
1626 : * beware of startingWorker being INVALID; this should normally not
1627 : * happen, but if a worker fails after forking and before this, the
1628 : * launcher might have decided to remove it from the queue and start
1629 : * again.
1630 : */
1631 4 : if (AutoVacuumShmem->av_startingWorker != NULL)
1632 : {
1633 4 : MyWorkerInfo = AutoVacuumShmem->av_startingWorker;
1634 4 : dbid = MyWorkerInfo->wi_dboid;
1635 4 : MyWorkerInfo->wi_proc = MyProc;
1636 :
1637 : /* insert into the running list */
1638 4 : dlist_push_head(&AutoVacuumShmem->av_runningWorkers,
1639 4 : &MyWorkerInfo->wi_links);
1640 :
1641 : /*
1642 : * remove from the "starting" pointer, so that the launcher can start
1643 : * a new worker if required
1644 : */
1645 4 : AutoVacuumShmem->av_startingWorker = NULL;
1646 4 : LWLockRelease(AutovacuumLock);
1647 :
1648 4 : on_shmem_exit(FreeWorkerInfo, 0);
1649 :
1650 : /* wake up the launcher */
1651 4 : if (AutoVacuumShmem->av_launcherpid != 0)
1652 4 : kill(AutoVacuumShmem->av_launcherpid, SIGUSR2);
1653 : }
1654 : else
1655 : {
1656 : /* no worker entry for me, go away */
1657 0 : elog(WARNING, "autovacuum worker started without a worker entry");
1658 0 : dbid = InvalidOid;
1659 0 : LWLockRelease(AutovacuumLock);
1660 : }
1661 :
1662 4 : if (OidIsValid(dbid))
1663 : {
1664 : char dbname[NAMEDATALEN];
1665 :
1666 : /*
1667 : * Report autovac startup to the stats collector. We deliberately do
1668 : * this before InitPostgres, so that the last_autovac_time will get
1669 : * updated even if the connection attempt fails. This is to prevent
1670 : * autovac from getting "stuck" repeatedly selecting an unopenable
1671 : * database, rather than making any progress on stuff it can connect
1672 : * to.
1673 : */
1674 4 : pgstat_report_autovac(dbid);
1675 :
1676 : /*
1677 : * Connect to the selected database
1678 : *
1679 : * Note: if we have selected a just-deleted database (due to using
1680 : * stale stats info), we'll fail and exit here.
1681 : */
1682 4 : InitPostgres(NULL, dbid, NULL, InvalidOid, dbname);
1683 4 : SetProcessingMode(NormalProcessing);
1684 4 : set_ps_display(dbname, false);
1685 4 : ereport(DEBUG1,
1686 : (errmsg("autovacuum: processing database \"%s\"", dbname)));
1687 :
1688 4 : if (PostAuthDelay)
1689 0 : pg_usleep(PostAuthDelay * 1000000L);
1690 :
1691 : /* And do an appropriate amount of work */
1692 4 : recentXid = ReadNewTransactionId();
1693 4 : recentMulti = ReadNextMultiXactId();
1694 4 : do_autovacuum();
1695 : }
1696 :
1697 : /*
1698 : * The launcher will be notified of my death in ProcKill, *if* we managed
1699 : * to get a worker slot at all
1700 : */
1701 :
1702 : /* All done, go away */
1703 3 : proc_exit(0);
1704 : }
1705 :
1706 : /*
1707 : * Return a WorkerInfo to the free list
1708 : */
1709 : static void
1710 4 : FreeWorkerInfo(int code, Datum arg)
1711 : {
1712 4 : if (MyWorkerInfo != NULL)
1713 : {
1714 4 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
1715 :
1716 : /*
1717 : * Wake the launcher up so that he can launch a new worker immediately
1718 : * if required. We only save the launcher's PID in local memory here;
1719 : * the actual signal will be sent when the PGPROC is recycled. Note
1720 : * that we always do this, so that the launcher can rebalance the cost
1721 : * limit setting of the remaining workers.
1722 : *
1723 : * We somewhat ignore the risk that the launcher changes its PID
1724 : * between us reading it and the actual kill; we expect ProcKill to be
1725 : * called shortly after us, and we assume that PIDs are not reused too
1726 : * quickly after a process exits.
1727 : */
1728 4 : AutovacuumLauncherPid = AutoVacuumShmem->av_launcherpid;
1729 :
1730 4 : dlist_delete(&MyWorkerInfo->wi_links);
1731 4 : MyWorkerInfo->wi_dboid = InvalidOid;
1732 4 : MyWorkerInfo->wi_tableoid = InvalidOid;
1733 4 : MyWorkerInfo->wi_sharedrel = false;
1734 4 : MyWorkerInfo->wi_proc = NULL;
1735 4 : MyWorkerInfo->wi_launchtime = 0;
1736 4 : MyWorkerInfo->wi_dobalance = false;
1737 4 : MyWorkerInfo->wi_cost_delay = 0;
1738 4 : MyWorkerInfo->wi_cost_limit = 0;
1739 4 : MyWorkerInfo->wi_cost_limit_base = 0;
1740 4 : dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
1741 4 : &MyWorkerInfo->wi_links);
1742 : /* not mine anymore */
1743 4 : MyWorkerInfo = NULL;
1744 :
1745 : /*
1746 : * now that we're inactive, cause a rebalancing of the surviving
1747 : * workers
1748 : */
1749 4 : AutoVacuumShmem->av_signal[AutoVacRebalance] = true;
1750 4 : LWLockRelease(AutovacuumLock);
1751 : }
1752 4 : }
1753 :
1754 : /*
1755 : * Update the cost-based delay parameters, so that multiple workers consume
1756 : * each a fraction of the total available I/O.
1757 : */
1758 : void
1759 62 : AutoVacuumUpdateDelay(void)
1760 : {
1761 62 : if (MyWorkerInfo)
1762 : {
1763 62 : VacuumCostDelay = MyWorkerInfo->wi_cost_delay;
1764 62 : VacuumCostLimit = MyWorkerInfo->wi_cost_limit;
1765 : }
1766 62 : }
1767 :
1768 : /*
1769 : * autovac_balance_cost
1770 : * Recalculate the cost limit setting for each active worker.
1771 : *
1772 : * Caller must hold the AutovacuumLock in exclusive mode.
1773 : */
1774 : static void
1775 49 : autovac_balance_cost(void)
1776 : {
1777 : /*
1778 : * The idea here is that we ration out I/O equally. The amount of I/O
1779 : * that a worker can consume is determined by cost_limit/cost_delay, so we
1780 : * try to equalize those ratios rather than the raw limit settings.
1781 : *
1782 : * note: in cost_limit, zero also means use value from elsewhere, because
1783 : * zero is not a valid value.
1784 : */
1785 98 : int vac_cost_limit = (autovacuum_vac_cost_limit > 0 ?
1786 49 : autovacuum_vac_cost_limit : VacuumCostLimit);
1787 98 : int vac_cost_delay = (autovacuum_vac_cost_delay >= 0 ?
1788 49 : autovacuum_vac_cost_delay : VacuumCostDelay);
1789 : double cost_total;
1790 : double cost_avail;
1791 : dlist_iter iter;
1792 :
1793 : /* not set? nothing to do */
1794 49 : if (vac_cost_limit <= 0 || vac_cost_delay <= 0)
1795 3 : return;
1796 :
1797 : /* calculate the total base cost limit of participating active workers */
1798 49 : cost_total = 0.0;
1799 95 : dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers)
1800 : {
1801 46 : WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur);
1802 :
1803 92 : if (worker->wi_proc != NULL &&
1804 92 : worker->wi_dobalance &&
1805 92 : worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
1806 138 : cost_total +=
1807 92 : (double) worker->wi_cost_limit_base / worker->wi_cost_delay;
1808 : }
1809 :
1810 : /* there are no cost limits -- nothing to do */
1811 49 : if (cost_total <= 0)
1812 3 : return;
1813 :
1814 : /*
1815 : * Adjust cost limit of each active worker to balance the total of cost
1816 : * limit to autovacuum_vacuum_cost_limit.
1817 : */
1818 46 : cost_avail = (double) vac_cost_limit / vac_cost_delay;
1819 92 : dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers)
1820 : {
1821 46 : WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur);
1822 :
1823 92 : if (worker->wi_proc != NULL &&
1824 92 : worker->wi_dobalance &&
1825 92 : worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
1826 : {
1827 92 : int limit = (int)
1828 46 : (cost_avail * worker->wi_cost_limit_base / cost_total);
1829 :
1830 : /*
1831 : * We put a lower bound of 1 on the cost_limit, to avoid division-
1832 : * by-zero in the vacuum code. Also, in case of roundoff trouble
1833 : * in these calculations, let's be sure we don't ever set
1834 : * cost_limit to more than the base value.
1835 : */
1836 46 : worker->wi_cost_limit = Max(Min(limit,
1837 : worker->wi_cost_limit_base),
1838 : 1);
1839 : }
1840 :
1841 46 : if (worker->wi_proc != NULL)
1842 46 : elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, dobalance=%s cost_limit=%d, cost_limit_base=%d, cost_delay=%d)",
1843 : worker->wi_proc->pid, worker->wi_dboid, worker->wi_tableoid,
1844 : worker->wi_dobalance ? "yes" : "no",
1845 : worker->wi_cost_limit, worker->wi_cost_limit_base,
1846 : worker->wi_cost_delay);
1847 : }
1848 : }
1849 :
1850 : /*
1851 : * get_database_list
1852 : * Return a list of all databases found in pg_database.
1853 : *
1854 : * The list and associated data is allocated in the caller's memory context,
1855 : * which is in charge of ensuring that it's properly cleaned up afterwards.
1856 : *
1857 : * Note: this is the only function in which the autovacuum launcher uses a
1858 : * transaction. Although we aren't attached to any particular database and
1859 : * therefore can't access most catalogs, we do have enough infrastructure
1860 : * to do a seqscan on pg_database.
1861 : */
1862 : static List *
1863 7 : get_database_list(void)
1864 : {
1865 7 : List *dblist = NIL;
1866 : Relation rel;
1867 : HeapScanDesc scan;
1868 : HeapTuple tup;
1869 : MemoryContext resultcxt;
1870 :
1871 : /* This is the context that we will allocate our output data in */
1872 7 : resultcxt = CurrentMemoryContext;
1873 :
1874 : /*
1875 : * Start a transaction so we can access pg_database, and get a snapshot.
1876 : * We don't have a use for the snapshot itself, but we're interested in
1877 : * the secondary effect that it sets RecentGlobalXmin. (This is critical
1878 : * for anything that reads heap pages, because HOT may decide to prune
1879 : * them even if the process doesn't attempt to modify any tuples.)
1880 : */
1881 7 : StartTransactionCommand();
1882 7 : (void) GetTransactionSnapshot();
1883 :
1884 7 : rel = heap_open(DatabaseRelationId, AccessShareLock);
1885 7 : scan = heap_beginscan_catalog(rel, 0, NULL);
1886 :
1887 40 : while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
1888 : {
1889 26 : Form_pg_database pgdatabase = (Form_pg_database) GETSTRUCT(tup);
1890 : avw_dbase *avdb;
1891 : MemoryContext oldcxt;
1892 :
1893 : /*
1894 : * Allocate our results in the caller's context, not the
1895 : * transaction's. We do this inside the loop, and restore the original
1896 : * context at the end, so that leaky things like heap_getnext() are
1897 : * not called in a potentially long-lived context.
1898 : */
1899 26 : oldcxt = MemoryContextSwitchTo(resultcxt);
1900 :
1901 26 : avdb = (avw_dbase *) palloc(sizeof(avw_dbase));
1902 :
1903 26 : avdb->adw_datid = HeapTupleGetOid(tup);
1904 26 : avdb->adw_name = pstrdup(NameStr(pgdatabase->datname));
1905 26 : avdb->adw_frozenxid = pgdatabase->datfrozenxid;
1906 26 : avdb->adw_minmulti = pgdatabase->datminmxid;
1907 : /* this gets set later: */
1908 26 : avdb->adw_entry = NULL;
1909 :
1910 26 : dblist = lappend(dblist, avdb);
1911 26 : MemoryContextSwitchTo(oldcxt);
1912 : }
1913 :
1914 7 : heap_endscan(scan);
1915 7 : heap_close(rel, AccessShareLock);
1916 :
1917 7 : CommitTransactionCommand();
1918 :
1919 7 : return dblist;
1920 : }
1921 :
1922 : /*
1923 : * Process a database table-by-table
1924 : *
1925 : * Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
1926 : * order not to ignore shutdown commands for too long.
1927 : */
1928 : static void
1929 4 : do_autovacuum(void)
1930 : {
1931 : Relation classRel;
1932 : HeapTuple tuple;
1933 : HeapScanDesc relScan;
1934 : Form_pg_database dbForm;
1935 4 : List *table_oids = NIL;
1936 4 : List *orphan_oids = NIL;
1937 : HASHCTL ctl;
1938 : HTAB *table_toast_map;
1939 : ListCell *volatile cell;
1940 : PgStat_StatDBEntry *shared;
1941 : PgStat_StatDBEntry *dbentry;
1942 : BufferAccessStrategy bstrategy;
1943 : ScanKeyData key;
1944 : TupleDesc pg_class_desc;
1945 : int effective_multixact_freeze_max_age;
1946 4 : bool did_vacuum = false;
1947 4 : bool found_concurrent_worker = false;
1948 : int i;
1949 :
1950 : /*
1951 : * StartTransactionCommand and CommitTransactionCommand will automatically
1952 : * switch to other contexts. We need this one to keep the list of
1953 : * relations to vacuum/analyze across transactions.
1954 : */
1955 4 : AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
1956 : "AV worker",
1957 : ALLOCSET_DEFAULT_SIZES);
1958 4 : MemoryContextSwitchTo(AutovacMemCxt);
1959 :
1960 : /*
1961 : * may be NULL if we couldn't find an entry (only happens if we are
1962 : * forcing a vacuum for anti-wrap purposes).
1963 : */
1964 4 : dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
1965 :
1966 : /* Start a transaction so our commands have one to play into. */
1967 4 : StartTransactionCommand();
1968 :
1969 : /*
1970 : * Clean up any dead statistics collector entries for this DB. We always
1971 : * want to do this exactly once per DB-processing cycle, even if we find
1972 : * nothing worth vacuuming in the database.
1973 : */
1974 4 : pgstat_vacuum_stat();
1975 :
1976 : /*
1977 : * Compute the multixact age for which freezing is urgent. This is
1978 : * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1979 : * short of multixact member space.
1980 : */
1981 4 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1982 :
1983 : /*
1984 : * Find the pg_database entry and select the default freeze ages. We use
1985 : * zero in template and nonconnectable databases, else the system-wide
1986 : * default.
1987 : */
1988 4 : tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1989 4 : if (!HeapTupleIsValid(tuple))
1990 0 : elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
1991 4 : dbForm = (Form_pg_database) GETSTRUCT(tuple);
1992 :
1993 4 : if (dbForm->datistemplate || !dbForm->datallowconn)
1994 : {
1995 0 : default_freeze_min_age = 0;
1996 0 : default_freeze_table_age = 0;
1997 0 : default_multixact_freeze_min_age = 0;
1998 0 : default_multixact_freeze_table_age = 0;
1999 : }
2000 : else
2001 : {
2002 4 : default_freeze_min_age = vacuum_freeze_min_age;
2003 4 : default_freeze_table_age = vacuum_freeze_table_age;
2004 4 : default_multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
2005 4 : default_multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
2006 : }
2007 :
2008 4 : ReleaseSysCache(tuple);
2009 :
2010 : /* StartTransactionCommand changed elsewhere */
2011 4 : MemoryContextSwitchTo(AutovacMemCxt);
2012 :
2013 : /* The database hash where pgstat keeps shared relations */
2014 4 : shared = pgstat_fetch_stat_dbentry(InvalidOid);
2015 :
2016 4 : classRel = heap_open(RelationRelationId, AccessShareLock);
2017 :
2018 : /* create a copy so we can use it after closing pg_class */
2019 4 : pg_class_desc = CreateTupleDescCopy(RelationGetDescr(classRel));
2020 :
2021 : /* create hash table for toast <-> main relid mapping */
2022 4 : MemSet(&ctl, 0, sizeof(ctl));
2023 4 : ctl.keysize = sizeof(Oid);
2024 4 : ctl.entrysize = sizeof(av_relation);
2025 :
2026 4 : table_toast_map = hash_create("TOAST to main relid map",
2027 : 100,
2028 : &ctl,
2029 : HASH_ELEM | HASH_BLOBS);
2030 :
2031 : /*
2032 : * Scan pg_class to determine which tables to vacuum.
2033 : *
2034 : * We do this in two passes: on the first one we collect the list of plain
2035 : * relations and materialized views, and on the second one we collect
2036 : * TOAST tables. The reason for doing the second pass is that during it we
2037 : * want to use the main relation's pg_class.reloptions entry if the TOAST
2038 : * table does not have any, and we cannot obtain it unless we know
2039 : * beforehand what's the main table OID.
2040 : *
2041 : * We need to check TOAST tables separately because in cases with short,
2042 : * wide tables there might be proportionally much more activity in the
2043 : * TOAST table than in its parent.
2044 : */
2045 4 : relScan = heap_beginscan_catalog(classRel, 0, NULL);
2046 :
2047 : /*
2048 : * On the first pass, we collect main tables to vacuum, and also the main
2049 : * table relid to TOAST relid mapping.
2050 : */
2051 2435 : while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
2052 : {
2053 2427 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
2054 : PgStat_StatTabEntry *tabentry;
2055 : AutoVacOpts *relopts;
2056 : Oid relid;
2057 : bool dovacuum;
2058 : bool doanalyze;
2059 : bool wraparound;
2060 :
2061 4185 : if (classForm->relkind != RELKIND_RELATION &&
2062 1758 : classForm->relkind != RELKIND_MATVIEW)
2063 3504 : continue;
2064 :
2065 675 : relid = HeapTupleGetOid(tuple);
2066 :
2067 : /*
2068 : * Check if it is a temp table (presumably, of some other backend's).
2069 : * We cannot safely process other backends' temp tables.
2070 : */
2071 675 : if (classForm->relpersistence == RELPERSISTENCE_TEMP)
2072 : {
2073 : int backendID;
2074 :
2075 0 : backendID = GetTempNamespaceBackendId(classForm->relnamespace);
2076 :
2077 : /* We just ignore it if the owning backend is still active */
2078 0 : if (backendID != InvalidBackendId &&
2079 0 : (backendID == MyBackendId ||
2080 0 : BackendIdGetProc(backendID) == NULL))
2081 : {
2082 : /*
2083 : * The table seems to be orphaned -- although it might be that
2084 : * the owning backend has already deleted it and exited; our
2085 : * pg_class scan snapshot is not necessarily up-to-date
2086 : * anymore, so we could be looking at a committed-dead entry.
2087 : * Remember it so we can try to delete it later.
2088 : */
2089 0 : orphan_oids = lappend_oid(orphan_oids, relid);
2090 : }
2091 0 : continue;
2092 : }
2093 :
2094 : /* Fetch reloptions and the pgstat entry for this table */
2095 675 : relopts = extract_autovac_opts(tuple, pg_class_desc);
2096 675 : tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
2097 : shared, dbentry);
2098 :
2099 : /* Check if it needs vacuum or analyze */
2100 675 : relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
2101 : effective_multixact_freeze_max_age,
2102 : &dovacuum, &doanalyze, &wraparound);
2103 :
2104 : /* Relations that need work are added to table_oids */
2105 675 : if (dovacuum || doanalyze)
2106 77 : table_oids = lappend_oid(table_oids, relid);
2107 :
2108 : /*
2109 : * Remember TOAST associations for the second pass. Note: we must do
2110 : * this whether or not the table is going to be vacuumed, because we
2111 : * don't automatically vacuum toast tables along the parent table.
2112 : */
2113 675 : if (OidIsValid(classForm->reltoastrelid))
2114 : {
2115 : av_relation *hentry;
2116 : bool found;
2117 :
2118 268 : hentry = hash_search(table_toast_map,
2119 268 : &classForm->reltoastrelid,
2120 : HASH_ENTER, &found);
2121 :
2122 268 : if (!found)
2123 : {
2124 : /* hash_search already filled in the key */
2125 268 : hentry->ar_relid = relid;
2126 268 : hentry->ar_hasrelopts = false;
2127 268 : if (relopts != NULL)
2128 : {
2129 3 : hentry->ar_hasrelopts = true;
2130 3 : memcpy(&hentry->ar_reloptions, relopts,
2131 : sizeof(AutoVacOpts));
2132 : }
2133 : }
2134 : }
2135 : }
2136 :
2137 4 : heap_endscan(relScan);
2138 :
2139 : /* second pass: check TOAST tables */
2140 4 : ScanKeyInit(&key,
2141 : Anum_pg_class_relkind,
2142 : BTEqualStrategyNumber, F_CHAREQ,
2143 : CharGetDatum(RELKIND_TOASTVALUE));
2144 :
2145 4 : relScan = heap_beginscan_catalog(classRel, 1, &key);
2146 282 : while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
2147 : {
2148 274 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
2149 : PgStat_StatTabEntry *tabentry;
2150 : Oid relid;
2151 274 : AutoVacOpts *relopts = NULL;
2152 : bool dovacuum;
2153 : bool doanalyze;
2154 : bool wraparound;
2155 :
2156 : /*
2157 : * We cannot safely process other backends' temp tables, so skip 'em.
2158 : */
2159 274 : if (classForm->relpersistence == RELPERSISTENCE_TEMP)
2160 0 : continue;
2161 :
2162 274 : relid = HeapTupleGetOid(tuple);
2163 :
2164 : /*
2165 : * fetch reloptions -- if this toast table does not have them, try the
2166 : * main rel
2167 : */
2168 274 : relopts = extract_autovac_opts(tuple, pg_class_desc);
2169 274 : if (relopts == NULL)
2170 : {
2171 : av_relation *hentry;
2172 : bool found;
2173 :
2174 274 : hentry = hash_search(table_toast_map, &relid, HASH_FIND, &found);
2175 274 : if (found && hentry->ar_hasrelopts)
2176 3 : relopts = &hentry->ar_reloptions;
2177 : }
2178 :
2179 : /* Fetch the pgstat entry for this table */
2180 274 : tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
2181 : shared, dbentry);
2182 :
2183 274 : relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
2184 : effective_multixact_freeze_max_age,
2185 : &dovacuum, &doanalyze, &wraparound);
2186 :
2187 : /* ignore analyze for toast tables */
2188 274 : if (dovacuum)
2189 0 : table_oids = lappend_oid(table_oids, relid);
2190 : }
2191 :
2192 4 : heap_endscan(relScan);
2193 4 : heap_close(classRel, AccessShareLock);
2194 :
2195 : /*
2196 : * Recheck orphan temporary tables, and if they still seem orphaned, drop
2197 : * them. We'll eat a transaction per dropped table, which might seem
2198 : * excessive, but we should only need to do anything as a result of a
2199 : * previous backend crash, so this should not happen often enough to
2200 : * justify "optimizing". Using separate transactions ensures that we
2201 : * don't bloat the lock table if there are many temp tables to be dropped,
2202 : * and it ensures that we don't lose work if a deletion attempt fails.
2203 : */
2204 4 : foreach(cell, orphan_oids)
2205 : {
2206 0 : Oid relid = lfirst_oid(cell);
2207 : Form_pg_class classForm;
2208 : int backendID;
2209 : ObjectAddress object;
2210 :
2211 : /*
2212 : * Check for user-requested abort.
2213 : */
2214 0 : CHECK_FOR_INTERRUPTS();
2215 :
2216 : /*
2217 : * Try to lock the table. If we can't get the lock immediately,
2218 : * somebody else is using (or dropping) the table, so it's not our
2219 : * concern anymore. Having the lock prevents race conditions below.
2220 : */
2221 0 : if (!ConditionalLockRelationOid(relid, AccessExclusiveLock))
2222 0 : continue;
2223 :
2224 : /*
2225 : * Re-fetch the pg_class tuple and re-check whether it still seems to
2226 : * be an orphaned temp table. If it's not there or no longer the same
2227 : * relation, ignore it.
2228 : */
2229 0 : tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2230 0 : if (!HeapTupleIsValid(tuple))
2231 : {
2232 : /* be sure to drop useless lock so we don't bloat lock table */
2233 0 : UnlockRelationOid(relid, AccessExclusiveLock);
2234 0 : continue;
2235 : }
2236 0 : classForm = (Form_pg_class) GETSTRUCT(tuple);
2237 :
2238 : /*
2239 : * Make all the same tests made in the loop above. In event of OID
2240 : * counter wraparound, the pg_class entry we have now might be
2241 : * completely unrelated to the one we saw before.
2242 : */
2243 0 : if (!((classForm->relkind == RELKIND_RELATION ||
2244 0 : classForm->relkind == RELKIND_MATVIEW) &&
2245 0 : classForm->relpersistence == RELPERSISTENCE_TEMP))
2246 : {
2247 0 : UnlockRelationOid(relid, AccessExclusiveLock);
2248 0 : continue;
2249 : }
2250 0 : backendID = GetTempNamespaceBackendId(classForm->relnamespace);
2251 0 : if (!(backendID != InvalidBackendId &&
2252 0 : (backendID == MyBackendId ||
2253 0 : BackendIdGetProc(backendID) == NULL)))
2254 : {
2255 0 : UnlockRelationOid(relid, AccessExclusiveLock);
2256 0 : continue;
2257 : }
2258 :
2259 : /* OK, let's delete it */
2260 0 : ereport(LOG,
2261 : (errmsg("autovacuum: dropping orphan temp table \"%s.%s.%s\"",
2262 : get_database_name(MyDatabaseId),
2263 : get_namespace_name(classForm->relnamespace),
2264 : NameStr(classForm->relname))));
2265 :
2266 0 : object.classId = RelationRelationId;
2267 0 : object.objectId = relid;
2268 0 : object.objectSubId = 0;
2269 0 : performDeletion(&object, DROP_CASCADE,
2270 : PERFORM_DELETION_INTERNAL |
2271 : PERFORM_DELETION_QUIETLY |
2272 : PERFORM_DELETION_SKIP_EXTENSIONS);
2273 :
2274 : /*
2275 : * To commit the deletion, end current transaction and start a new
2276 : * one. Note this also releases the lock we took.
2277 : */
2278 0 : CommitTransactionCommand();
2279 0 : StartTransactionCommand();
2280 :
2281 : /* StartTransactionCommand changed current memory context */
2282 0 : MemoryContextSwitchTo(AutovacMemCxt);
2283 : }
2284 :
2285 : /*
2286 : * Create a buffer access strategy object for VACUUM to use. We want to
2287 : * use the same one across all the vacuum operations we perform, since the
2288 : * point is for VACUUM not to blow out the shared cache.
2289 : */
2290 4 : bstrategy = GetAccessStrategy(BAS_VACUUM);
2291 :
2292 : /*
2293 : * create a memory context to act as fake PortalContext, so that the
2294 : * contexts created in the vacuum code are cleaned up for each table.
2295 : */
2296 4 : PortalContext = AllocSetContextCreate(AutovacMemCxt,
2297 : "Autovacuum Portal",
2298 : ALLOCSET_DEFAULT_SIZES);
2299 :
2300 : /*
2301 : * Perform operations on collected tables.
2302 : */
2303 50 : foreach(cell, table_oids)
2304 : {
2305 47 : Oid relid = lfirst_oid(cell);
2306 : autovac_table *tab;
2307 : bool skipit;
2308 : int stdVacuumCostDelay;
2309 : int stdVacuumCostLimit;
2310 : dlist_iter iter;
2311 :
2312 47 : CHECK_FOR_INTERRUPTS();
2313 :
2314 : /*
2315 : * Check for config changes before processing each collected table.
2316 : */
2317 47 : if (got_SIGHUP)
2318 : {
2319 0 : got_SIGHUP = false;
2320 0 : ProcessConfigFile(PGC_SIGHUP);
2321 :
2322 : /*
2323 : * You might be tempted to bail out if we see autovacuum is now
2324 : * disabled. Must resist that temptation -- this might be a
2325 : * for-wraparound emergency worker, in which case that would be
2326 : * entirely inappropriate.
2327 : */
2328 : }
2329 :
2330 : /*
2331 : * hold schedule lock from here until we're sure that this table still
2332 : * needs vacuuming. We also need the AutovacuumLock to walk the
2333 : * worker array, but we'll let go of that one quickly.
2334 : */
2335 47 : LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE);
2336 47 : LWLockAcquire(AutovacuumLock, LW_SHARED);
2337 :
2338 : /*
2339 : * Check whether the table is being vacuumed concurrently by another
2340 : * worker.
2341 : */
2342 47 : skipit = false;
2343 94 : dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers)
2344 : {
2345 47 : WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur);
2346 :
2347 : /* ignore myself */
2348 47 : if (worker == MyWorkerInfo)
2349 47 : continue;
2350 :
2351 : /* ignore workers in other databases (unless table is shared) */
2352 0 : if (!worker->wi_sharedrel && worker->wi_dboid != MyDatabaseId)
2353 0 : continue;
2354 :
2355 0 : if (worker->wi_tableoid == relid)
2356 : {
2357 0 : skipit = true;
2358 0 : found_concurrent_worker = true;
2359 0 : break;
2360 : }
2361 : }
2362 47 : LWLockRelease(AutovacuumLock);
2363 47 : if (skipit)
2364 : {
2365 0 : LWLockRelease(AutovacuumScheduleLock);
2366 1 : continue;
2367 : }
2368 :
2369 : /*
2370 : * Check whether pgstat data still says we need to vacuum this table.
2371 : * It could have changed if something else processed the table while
2372 : * we weren't looking.
2373 : *
2374 : * Note: we have a special case in pgstat code to ensure that the
2375 : * stats we read are as up-to-date as possible, to avoid the problem
2376 : * that somebody just finished vacuuming this table. The window to
2377 : * the race condition is not closed but it is very small.
2378 : */
2379 47 : MemoryContextSwitchTo(AutovacMemCxt);
2380 47 : tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc,
2381 : effective_multixact_freeze_max_age);
2382 47 : if (tab == NULL)
2383 : {
2384 : /* someone else vacuumed the table, or it went away */
2385 1 : LWLockRelease(AutovacuumScheduleLock);
2386 1 : continue;
2387 : }
2388 :
2389 : /*
2390 : * Ok, good to go. Store the table in shared memory before releasing
2391 : * the lock so that other workers don't vacuum it concurrently.
2392 : */
2393 46 : MyWorkerInfo->wi_tableoid = relid;
2394 46 : MyWorkerInfo->wi_sharedrel = tab->at_sharedrel;
2395 46 : LWLockRelease(AutovacuumScheduleLock);
2396 :
2397 : /*
2398 : * Remember the prevailing values of the vacuum cost GUCs. We have to
2399 : * restore these at the bottom of the loop, else we'll compute wrong
2400 : * values in the next iteration of autovac_balance_cost().
2401 : */
2402 46 : stdVacuumCostDelay = VacuumCostDelay;
2403 46 : stdVacuumCostLimit = VacuumCostLimit;
2404 :
2405 : /* Must hold AutovacuumLock while mucking with cost balance info */
2406 46 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
2407 :
2408 : /* advertise my cost delay parameters for the balancing algorithm */
2409 46 : MyWorkerInfo->wi_dobalance = tab->at_dobalance;
2410 46 : MyWorkerInfo->wi_cost_delay = tab->at_vacuum_cost_delay;
2411 46 : MyWorkerInfo->wi_cost_limit = tab->at_vacuum_cost_limit;
2412 46 : MyWorkerInfo->wi_cost_limit_base = tab->at_vacuum_cost_limit;
2413 :
2414 : /* do a balance */
2415 46 : autovac_balance_cost();
2416 :
2417 : /* set the active cost parameters from the result of that */
2418 46 : AutoVacuumUpdateDelay();
2419 :
2420 : /* done */
2421 46 : LWLockRelease(AutovacuumLock);
2422 :
2423 : /* clean up memory before each iteration */
2424 46 : MemoryContextResetAndDeleteChildren(PortalContext);
2425 :
2426 : /*
2427 : * Save the relation name for a possible error message, to avoid a
2428 : * catalog lookup in case of an error. If any of these return NULL,
2429 : * then the relation has been dropped since last we checked; skip it.
2430 : * Note: they must live in a long-lived memory context because we call
2431 : * vacuum and analyze in different transactions.
2432 : */
2433 :
2434 46 : tab->at_relname = get_rel_name(tab->at_relid);
2435 46 : tab->at_nspname = get_namespace_name(get_rel_namespace(tab->at_relid));
2436 46 : tab->at_datname = get_database_name(MyDatabaseId);
2437 46 : if (!tab->at_relname || !tab->at_nspname || !tab->at_datname)
2438 : goto deleted;
2439 :
2440 : /*
2441 : * We will abort vacuuming the current table if something errors out,
2442 : * and continue with the next one in schedule; in particular, this
2443 : * happens if we are interrupted with SIGINT.
2444 : */
2445 46 : PG_TRY();
2446 : {
2447 : /* have at it */
2448 46 : MemoryContextSwitchTo(TopTransactionContext);
2449 46 : autovacuum_do_vac_analyze(tab, bstrategy);
2450 :
2451 : /*
2452 : * Clear a possible query-cancel signal, to avoid a late reaction
2453 : * to an automatically-sent signal because of vacuuming the
2454 : * current table (we're done with it, so it would make no sense to
2455 : * cancel at this point.)
2456 : */
2457 45 : QueryCancelPending = false;
2458 : }
2459 0 : PG_CATCH();
2460 : {
2461 : /*
2462 : * Abort the transaction, start a new one, and proceed with the
2463 : * next table in our list.
2464 : */
2465 0 : HOLD_INTERRUPTS();
2466 0 : if (tab->at_vacoptions & VACOPT_VACUUM)
2467 0 : errcontext("automatic vacuum of table \"%s.%s.%s\"",
2468 : tab->at_datname, tab->at_nspname, tab->at_relname);
2469 : else
2470 0 : errcontext("automatic analyze of table \"%s.%s.%s\"",
2471 : tab->at_datname, tab->at_nspname, tab->at_relname);
2472 0 : EmitErrorReport();
2473 :
2474 : /* this resets the PGXACT flags too */
2475 0 : AbortOutOfAnyTransaction();
2476 0 : FlushErrorState();
2477 0 : MemoryContextResetAndDeleteChildren(PortalContext);
2478 :
2479 : /* restart our transaction for the following operations */
2480 0 : StartTransactionCommand();
2481 0 : RESUME_INTERRUPTS();
2482 : }
2483 45 : PG_END_TRY();
2484 :
2485 45 : did_vacuum = true;
2486 :
2487 : /* the PGXACT flags are reset at the next end of transaction */
2488 :
2489 : /* be tidy */
2490 : deleted:
2491 45 : if (tab->at_datname != NULL)
2492 45 : pfree(tab->at_datname);
2493 45 : if (tab->at_nspname != NULL)
2494 45 : pfree(tab->at_nspname);
2495 45 : if (tab->at_relname != NULL)
2496 45 : pfree(tab->at_relname);
2497 45 : pfree(tab);
2498 :
2499 : /*
2500 : * Remove my info from shared memory. We could, but intentionally
2501 : * don't, clear wi_cost_limit and friends --- this is on the
2502 : * assumption that we probably have more to do with similar cost
2503 : * settings, so we don't want to give up our share of I/O for a very
2504 : * short interval and thereby thrash the global balance.
2505 : */
2506 45 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
2507 45 : MyWorkerInfo->wi_tableoid = InvalidOid;
2508 45 : MyWorkerInfo->wi_sharedrel = false;
2509 45 : LWLockRelease(AutovacuumLock);
2510 :
2511 : /* restore vacuum cost GUCs for the next iteration */
2512 45 : VacuumCostDelay = stdVacuumCostDelay;
2513 45 : VacuumCostLimit = stdVacuumCostLimit;
2514 : }
2515 :
2516 : /*
2517 : * Perform additional work items, as requested by backends.
2518 : */
2519 3 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
2520 771 : for (i = 0; i < NUM_WORKITEMS; i++)
2521 : {
2522 768 : AutoVacuumWorkItem *workitem = &AutoVacuumShmem->av_workItems[i];
2523 :
2524 768 : if (!workitem->avw_used)
2525 768 : continue;
2526 0 : if (workitem->avw_active)
2527 0 : continue;
2528 :
2529 : /* claim this one, and release lock while performing it */
2530 0 : workitem->avw_active = true;
2531 0 : LWLockRelease(AutovacuumLock);
2532 :
2533 0 : perform_work_item(workitem);
2534 :
2535 : /*
2536 : * Check for config changes before acquiring lock for further
2537 : * jobs.
2538 : */
2539 0 : CHECK_FOR_INTERRUPTS();
2540 0 : if (got_SIGHUP)
2541 : {
2542 0 : got_SIGHUP = false;
2543 0 : ProcessConfigFile(PGC_SIGHUP);
2544 : }
2545 :
2546 0 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
2547 :
2548 : /* and mark it done */
2549 0 : workitem->avw_active = false;
2550 0 : workitem->avw_used = false;
2551 : }
2552 3 : LWLockRelease(AutovacuumLock);
2553 :
2554 : /*
2555 : * We leak table_toast_map here (among other things), but since we're
2556 : * going away soon, it's not a problem.
2557 : */
2558 :
2559 : /*
2560 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible. We
2561 : * only need to do this once, not after each table.
2562 : *
2563 : * Even if we didn't vacuum anything, it may still be important to do
2564 : * this, because one indirect effect of vac_update_datfrozenxid() is to
2565 : * update ShmemVariableCache->xidVacLimit. That might need to be done
2566 : * even if we haven't vacuumed anything, because relations with older
2567 : * relfrozenxid values or other databases with older datfrozenxid values
2568 : * might have been dropped, allowing xidVacLimit to advance.
2569 : *
2570 : * However, it's also important not to do this blindly in all cases,
2571 : * because when autovacuum=off this will restart the autovacuum launcher.
2572 : * If we're not careful, an infinite loop can result, where workers find
2573 : * no work to do and restart the launcher, which starts another worker in
2574 : * the same database that finds no work to do. To prevent that, we skip
2575 : * this if (1) we found no work to do and (2) we skipped at least one
2576 : * table due to concurrent autovacuum activity. In that case, the other
2577 : * worker has already done it, or will do so when it finishes.
2578 : */
2579 3 : if (did_vacuum || !found_concurrent_worker)
2580 3 : vac_update_datfrozenxid();
2581 :
2582 : /* Finally close out the last transaction. */
2583 3 : CommitTransactionCommand();
2584 3 : }
2585 :
2586 : /*
2587 : * Execute a previously registered work item.
2588 : */
2589 : static void
2590 0 : perform_work_item(AutoVacuumWorkItem *workitem)
2591 : {
2592 0 : char *cur_datname = NULL;
2593 0 : char *cur_nspname = NULL;
2594 0 : char *cur_relname = NULL;
2595 :
2596 : /*
2597 : * Note we do not store table info in MyWorkerInfo, since this is not
2598 : * vacuuming proper.
2599 : */
2600 :
2601 : /*
2602 : * Save the relation name for a possible error message, to avoid a catalog
2603 : * lookup in case of an error. If any of these return NULL, then the
2604 : * relation has been dropped since last we checked; skip it. Note: they
2605 : * must live in a long-lived memory context because we call vacuum and
2606 : * analyze in different transactions.
2607 : */
2608 :
2609 0 : cur_relname = get_rel_name(workitem->avw_relation);
2610 0 : cur_nspname = get_namespace_name(get_rel_namespace(workitem->avw_relation));
2611 0 : cur_datname = get_database_name(MyDatabaseId);
2612 0 : if (!cur_relname || !cur_nspname || !cur_datname)
2613 : goto deleted2;
2614 :
2615 0 : autovac_report_workitem(workitem, cur_nspname, cur_datname);
2616 :
2617 : /*
2618 : * We will abort the current work item if something errors out, and
2619 : * continue with the next one; in particular, this happens if we are
2620 : * interrupted with SIGINT. Note that this means that the work item list
2621 : * can be lossy.
2622 : */
2623 0 : PG_TRY();
2624 : {
2625 : /* have at it */
2626 0 : MemoryContextSwitchTo(TopTransactionContext);
2627 :
2628 0 : switch (workitem->avw_type)
2629 : {
2630 : case AVW_BRINSummarizeRange:
2631 0 : DirectFunctionCall2(brin_summarize_range,
2632 : ObjectIdGetDatum(workitem->avw_relation),
2633 : Int64GetDatum((int64) workitem->avw_blockNumber));
2634 0 : break;
2635 : default:
2636 0 : elog(WARNING, "unrecognized work item found: type %d",
2637 : workitem->avw_type);
2638 0 : break;
2639 : }
2640 :
2641 : /*
2642 : * Clear a possible query-cancel signal, to avoid a late reaction to
2643 : * an automatically-sent signal because of vacuuming the current table
2644 : * (we're done with it, so it would make no sense to cancel at this
2645 : * point.)
2646 : */
2647 0 : QueryCancelPending = false;
2648 : }
2649 0 : PG_CATCH();
2650 : {
2651 : /*
2652 : * Abort the transaction, start a new one, and proceed with the next
2653 : * table in our list.
2654 : */
2655 0 : HOLD_INTERRUPTS();
2656 0 : errcontext("processing work entry for relation \"%s.%s.%s\"",
2657 : cur_datname, cur_nspname, cur_relname);
2658 0 : EmitErrorReport();
2659 :
2660 : /* this resets the PGXACT flags too */
2661 0 : AbortOutOfAnyTransaction();
2662 0 : FlushErrorState();
2663 0 : MemoryContextResetAndDeleteChildren(PortalContext);
2664 :
2665 : /* restart our transaction for the following operations */
2666 0 : StartTransactionCommand();
2667 0 : RESUME_INTERRUPTS();
2668 : }
2669 0 : PG_END_TRY();
2670 :
2671 : /* We intentionally do not set did_vacuum here */
2672 :
2673 : /* be tidy */
2674 : deleted2:
2675 0 : if (cur_datname)
2676 0 : pfree(cur_datname);
2677 0 : if (cur_nspname)
2678 0 : pfree(cur_nspname);
2679 0 : if (cur_relname)
2680 0 : pfree(cur_relname);
2681 0 : }
2682 :
2683 : /*
2684 : * extract_autovac_opts
2685 : *
2686 : * Given a relation's pg_class tuple, return the AutoVacOpts portion of
2687 : * reloptions, if set; otherwise, return NULL.
2688 : */
2689 : static AutoVacOpts *
2690 996 : extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
2691 : {
2692 : bytea *relopts;
2693 : AutoVacOpts *av;
2694 :
2695 996 : Assert(((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_RELATION ||
2696 : ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW ||
2697 : ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE);
2698 :
2699 996 : relopts = extractRelOptions(tup, pg_class_desc, NULL);
2700 996 : if (relopts == NULL)
2701 992 : return NULL;
2702 :
2703 4 : av = palloc(sizeof(AutoVacOpts));
2704 4 : memcpy(av, &(((StdRdOptions *) relopts)->autovacuum), sizeof(AutoVacOpts));
2705 4 : pfree(relopts);
2706 :
2707 4 : return av;
2708 : }
2709 :
2710 : /*
2711 : * get_pgstat_tabentry_relid
2712 : *
2713 : * Fetch the pgstat entry of a table, either local to a database or shared.
2714 : */
2715 : static PgStat_StatTabEntry *
2716 996 : get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared,
2717 : PgStat_StatDBEntry *dbentry)
2718 : {
2719 996 : PgStat_StatTabEntry *tabentry = NULL;
2720 :
2721 996 : if (isshared)
2722 : {
2723 61 : if (PointerIsValid(shared))
2724 61 : tabentry = hash_search(shared->tables, &relid,
2725 : HASH_FIND, NULL);
2726 : }
2727 935 : else if (PointerIsValid(dbentry))
2728 935 : tabentry = hash_search(dbentry->tables, &relid,
2729 : HASH_FIND, NULL);
2730 :
2731 996 : return tabentry;
2732 : }
2733 :
2734 : /*
2735 : * table_recheck_autovac
2736 : *
2737 : * Recheck whether a table still needs vacuum or analyze. Return value is a
2738 : * valid autovac_table pointer if it does, NULL otherwise.
2739 : *
2740 : * Note that the returned autovac_table does not have the name fields set.
2741 : */
2742 : static autovac_table *
2743 47 : table_recheck_autovac(Oid relid, HTAB *table_toast_map,
2744 : TupleDesc pg_class_desc,
2745 : int effective_multixact_freeze_max_age)
2746 : {
2747 : Form_pg_class classForm;
2748 : HeapTuple classTup;
2749 : bool dovacuum;
2750 : bool doanalyze;
2751 47 : autovac_table *tab = NULL;
2752 : PgStat_StatTabEntry *tabentry;
2753 : PgStat_StatDBEntry *shared;
2754 : PgStat_StatDBEntry *dbentry;
2755 : bool wraparound;
2756 : AutoVacOpts *avopts;
2757 :
2758 : /* use fresh stats */
2759 47 : autovac_refresh_stats();
2760 :
2761 47 : shared = pgstat_fetch_stat_dbentry(InvalidOid);
2762 47 : dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
2763 :
2764 : /* fetch the relation's relcache entry */
2765 47 : classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2766 47 : if (!HeapTupleIsValid(classTup))
2767 0 : return NULL;
2768 47 : classForm = (Form_pg_class) GETSTRUCT(classTup);
2769 :
2770 : /*
2771 : * Get the applicable reloptions. If it is a TOAST table, try to get the
2772 : * main table reloptions if the toast table itself doesn't have.
2773 : */
2774 47 : avopts = extract_autovac_opts(classTup, pg_class_desc);
2775 47 : if (classForm->relkind == RELKIND_TOASTVALUE &&
2776 0 : avopts == NULL && table_toast_map != NULL)
2777 : {
2778 : av_relation *hentry;
2779 : bool found;
2780 :
2781 0 : hentry = hash_search(table_toast_map, &relid, HASH_FIND, &found);
2782 0 : if (found && hentry->ar_hasrelopts)
2783 0 : avopts = &hentry->ar_reloptions;
2784 : }
2785 :
2786 : /* fetch the pgstat table entry */
2787 47 : tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
2788 : shared, dbentry);
2789 :
2790 47 : relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
2791 : effective_multixact_freeze_max_age,
2792 : &dovacuum, &doanalyze, &wraparound);
2793 :
2794 : /* ignore ANALYZE for toast tables */
2795 47 : if (classForm->relkind == RELKIND_TOASTVALUE)
2796 0 : doanalyze = false;
2797 :
2798 : /* OK, it needs something done */
2799 47 : if (doanalyze || dovacuum)
2800 : {
2801 : int freeze_min_age;
2802 : int freeze_table_age;
2803 : int multixact_freeze_min_age;
2804 : int multixact_freeze_table_age;
2805 : int vac_cost_limit;
2806 : int vac_cost_delay;
2807 : int log_min_duration;
2808 :
2809 : /*
2810 : * Calculate the vacuum cost parameters and the freeze ages. If there
2811 : * are options set in pg_class.reloptions, use them; in the case of a
2812 : * toast table, try the main table too. Otherwise use the GUC
2813 : * defaults, autovacuum's own first and plain vacuum second.
2814 : */
2815 :
2816 : /* -1 in autovac setting means use plain vacuum_cost_delay */
2817 46 : vac_cost_delay = (avopts && avopts->vacuum_cost_delay >= 0)
2818 : ? avopts->vacuum_cost_delay
2819 92 : : (autovacuum_vac_cost_delay >= 0)
2820 : ? autovacuum_vac_cost_delay
2821 46 : : VacuumCostDelay;
2822 :
2823 : /* 0 or -1 in autovac setting means use plain vacuum_cost_limit */
2824 46 : vac_cost_limit = (avopts && avopts->vacuum_cost_limit > 0)
2825 : ? avopts->vacuum_cost_limit
2826 92 : : (autovacuum_vac_cost_limit > 0)
2827 : ? autovacuum_vac_cost_limit
2828 46 : : VacuumCostLimit;
2829 :
2830 : /* -1 in autovac setting means use log_autovacuum_min_duration */
2831 46 : log_min_duration = (avopts && avopts->log_min_duration >= 0)
2832 : ? avopts->log_min_duration
2833 46 : : Log_autovacuum_min_duration;
2834 :
2835 : /* these do not have autovacuum-specific settings */
2836 46 : freeze_min_age = (avopts && avopts->freeze_min_age >= 0)
2837 : ? avopts->freeze_min_age
2838 46 : : default_freeze_min_age;
2839 :
2840 46 : freeze_table_age = (avopts && avopts->freeze_table_age >= 0)
2841 : ? avopts->freeze_table_age
2842 46 : : default_freeze_table_age;
2843 :
2844 46 : multixact_freeze_min_age = (avopts &&
2845 0 : avopts->multixact_freeze_min_age >= 0)
2846 : ? avopts->multixact_freeze_min_age
2847 46 : : default_multixact_freeze_min_age;
2848 :
2849 46 : multixact_freeze_table_age = (avopts &&
2850 0 : avopts->multixact_freeze_table_age >= 0)
2851 : ? avopts->multixact_freeze_table_age
2852 46 : : default_multixact_freeze_table_age;
2853 :
2854 46 : tab = palloc(sizeof(autovac_table));
2855 46 : tab->at_relid = relid;
2856 46 : tab->at_sharedrel = classForm->relisshared;
2857 92 : tab->at_vacoptions = VACOPT_SKIPTOAST |
2858 92 : (dovacuum ? VACOPT_VACUUM : 0) |
2859 92 : (doanalyze ? VACOPT_ANALYZE : 0) |
2860 46 : (!wraparound ? VACOPT_NOWAIT : 0);
2861 46 : tab->at_params.freeze_min_age = freeze_min_age;
2862 46 : tab->at_params.freeze_table_age = freeze_table_age;
2863 46 : tab->at_params.multixact_freeze_min_age = multixact_freeze_min_age;
2864 46 : tab->at_params.multixact_freeze_table_age = multixact_freeze_table_age;
2865 46 : tab->at_params.is_wraparound = wraparound;
2866 46 : tab->at_params.log_min_duration = log_min_duration;
2867 46 : tab->at_vacuum_cost_limit = vac_cost_limit;
2868 46 : tab->at_vacuum_cost_delay = vac_cost_delay;
2869 46 : tab->at_relname = NULL;
2870 46 : tab->at_nspname = NULL;
2871 46 : tab->at_datname = NULL;
2872 :
2873 : /*
2874 : * If any of the cost delay parameters has been set individually for
2875 : * this table, disable the balancing algorithm.
2876 : */
2877 46 : tab->at_dobalance =
2878 46 : !(avopts && (avopts->vacuum_cost_limit > 0 ||
2879 0 : avopts->vacuum_cost_delay > 0));
2880 : }
2881 :
2882 47 : heap_freetuple(classTup);
2883 :
2884 47 : return tab;
2885 : }
2886 :
2887 : /*
2888 : * relation_needs_vacanalyze
2889 : *
2890 : * Check whether a relation needs to be vacuumed or analyzed; return each into
2891 : * "dovacuum" and "doanalyze", respectively. Also return whether the vacuum is
2892 : * being forced because of Xid or multixact wraparound.
2893 : *
2894 : * relopts is a pointer to the AutoVacOpts options (either for itself in the
2895 : * case of a plain table, or for either itself or its parent table in the case
2896 : * of a TOAST table), NULL if none; tabentry is the pgstats entry, which can be
2897 : * NULL.
2898 : *
2899 : * A table needs to be vacuumed if the number of dead tuples exceeds a
2900 : * threshold. This threshold is calculated as
2901 : *
2902 : * threshold = vac_base_thresh + vac_scale_factor * reltuples
2903 : *
2904 : * For analyze, the analysis done is that the number of tuples inserted,
2905 : * deleted and updated since the last analyze exceeds a threshold calculated
2906 : * in the same fashion as above. Note that the collector actually stores
2907 : * the number of tuples (both live and dead) that there were as of the last
2908 : * analyze. This is asymmetric to the VACUUM case.
2909 : *
2910 : * We also force vacuum if the table's relfrozenxid is more than freeze_max_age
2911 : * transactions back, and if its relminmxid is more than
2912 : * multixact_freeze_max_age multixacts back.
2913 : *
2914 : * A table whose autovacuum_enabled option is false is
2915 : * automatically skipped (unless we have to vacuum it due to freeze_max_age).
2916 : * Thus autovacuum can be disabled for specific tables. Also, when the stats
2917 : * collector does not have data about a table, it will be skipped.
2918 : *
2919 : * A table whose vac_base_thresh value is < 0 takes the base value from the
2920 : * autovacuum_vacuum_threshold GUC variable. Similarly, a vac_scale_factor
2921 : * value < 0 is substituted with the value of
2922 : * autovacuum_vacuum_scale_factor GUC variable. Ditto for analyze.
2923 : */
2924 : static void
2925 996 : relation_needs_vacanalyze(Oid relid,
2926 : AutoVacOpts *relopts,
2927 : Form_pg_class classForm,
2928 : PgStat_StatTabEntry *tabentry,
2929 : int effective_multixact_freeze_max_age,
2930 : /* output params below */
2931 : bool *dovacuum,
2932 : bool *doanalyze,
2933 : bool *wraparound)
2934 : {
2935 : bool force_vacuum;
2936 : bool av_enabled;
2937 : float4 reltuples; /* pg_class.reltuples */
2938 :
2939 : /* constants from reloptions or GUC variables */
2940 : int vac_base_thresh,
2941 : anl_base_thresh;
2942 : float4 vac_scale_factor,
2943 : anl_scale_factor;
2944 :
2945 : /* thresholds calculated from above constants */
2946 : float4 vacthresh,
2947 : anlthresh;
2948 :
2949 : /* number of vacuum (resp. analyze) tuples at this time */
2950 : float4 vactuples,
2951 : anltuples;
2952 :
2953 : /* freeze parameters */
2954 : int freeze_max_age;
2955 : int multixact_freeze_max_age;
2956 : TransactionId xidForceLimit;
2957 : MultiXactId multiForceLimit;
2958 :
2959 996 : AssertArg(classForm != NULL);
2960 996 : AssertArg(OidIsValid(relid));
2961 :
2962 : /*
2963 : * Determine vacuum/analyze equation parameters. We have two possible
2964 : * sources: the passed reloptions (which could be a main table or a toast
2965 : * table), or the autovacuum GUC variables.
2966 : */
2967 :
2968 : /* -1 in autovac setting means use plain vacuum_cost_delay */
2969 996 : vac_scale_factor = (relopts && relopts->vacuum_scale_factor >= 0)
2970 0 : ? relopts->vacuum_scale_factor
2971 : : autovacuum_vac_scale;
2972 :
2973 1003 : vac_base_thresh = (relopts && relopts->vacuum_threshold >= 0)
2974 : ? relopts->vacuum_threshold
2975 996 : : autovacuum_vac_thresh;
2976 :
2977 996 : anl_scale_factor = (relopts && relopts->analyze_scale_factor >= 0)
2978 0 : ? relopts->analyze_scale_factor
2979 : : autovacuum_anl_scale;
2980 :
2981 1003 : anl_base_thresh = (relopts && relopts->analyze_threshold >= 0)
2982 : ? relopts->analyze_threshold
2983 996 : : autovacuum_anl_thresh;
2984 :
2985 1003 : freeze_max_age = (relopts && relopts->freeze_max_age >= 0)
2986 0 : ? Min(relopts->freeze_max_age, autovacuum_freeze_max_age)
2987 996 : : autovacuum_freeze_max_age;
2988 :
2989 1003 : multixact_freeze_max_age = (relopts && relopts->multixact_freeze_max_age >= 0)
2990 0 : ? Min(relopts->multixact_freeze_max_age, effective_multixact_freeze_max_age)
2991 996 : : effective_multixact_freeze_max_age;
2992 :
2993 996 : av_enabled = (relopts ? relopts->enabled : true);
2994 :
2995 : /* Force vacuum if table is at risk of wraparound */
2996 996 : xidForceLimit = recentXid - freeze_max_age;
2997 996 : if (xidForceLimit < FirstNormalTransactionId)
2998 0 : xidForceLimit -= FirstNormalTransactionId;
2999 1992 : force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) &&
3000 996 : TransactionIdPrecedes(classForm->relfrozenxid,
3001 : xidForceLimit));
3002 996 : if (!force_vacuum)
3003 : {
3004 996 : multiForceLimit = recentMulti - multixact_freeze_max_age;
3005 996 : if (multiForceLimit < FirstMultiXactId)
3006 0 : multiForceLimit -= FirstMultiXactId;
3007 996 : force_vacuum = MultiXactIdPrecedes(classForm->relminmxid,
3008 : multiForceLimit);
3009 : }
3010 996 : *wraparound = force_vacuum;
3011 :
3012 : /* User disabled it in pg_class.reloptions? (But ignore if at risk) */
3013 996 : if (!av_enabled && !force_vacuum)
3014 : {
3015 3 : *doanalyze = false;
3016 3 : *dovacuum = false;
3017 999 : return;
3018 : }
3019 :
3020 : /*
3021 : * If we found the table in the stats hash, and autovacuum is currently
3022 : * enabled, make a threshold-based decision whether to vacuum and/or
3023 : * analyze. If autovacuum is currently disabled, we must be here for
3024 : * anti-wraparound vacuuming only, so don't vacuum (or analyze) anything
3025 : * that's not being forced.
3026 : */
3027 993 : if (PointerIsValid(tabentry) && AutoVacuumingActive())
3028 : {
3029 765 : reltuples = classForm->reltuples;
3030 765 : vactuples = tabentry->n_dead_tuples;
3031 765 : anltuples = tabentry->changes_since_analyze;
3032 :
3033 765 : vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
3034 765 : anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;
3035 :
3036 : /*
3037 : * Note that we don't need to take special consideration for stat
3038 : * reset, because if that happens, the last vacuum and analyze counts
3039 : * will be reset too.
3040 : */
3041 765 : elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
3042 : NameStr(classForm->relname),
3043 : vactuples, vacthresh, anltuples, anlthresh);
3044 :
3045 : /* Determine if this table needs vacuum or analyze. */
3046 765 : *dovacuum = force_vacuum || (vactuples > vacthresh);
3047 765 : *doanalyze = (anltuples > anlthresh);
3048 : }
3049 : else
3050 : {
3051 : /*
3052 : * Skip a table not found in stat hash, unless we have to force vacuum
3053 : * for anti-wrap purposes. If it's not acted upon, there's no need to
3054 : * vacuum it.
3055 : */
3056 228 : *dovacuum = force_vacuum;
3057 228 : *doanalyze = false;
3058 : }
3059 :
3060 : /* ANALYZE refuses to work with pg_statistic */
3061 993 : if (relid == StatisticRelationId)
3062 5 : *doanalyze = false;
3063 : }
3064 :
3065 : /*
3066 : * autovacuum_do_vac_analyze
3067 : * Vacuum and/or analyze the specified table
3068 : */
3069 : static void
3070 46 : autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy)
3071 : {
3072 : RangeVar rangevar;
3073 :
3074 : /* Set up command parameters --- use local variables instead of palloc */
3075 46 : MemSet(&rangevar, 0, sizeof(rangevar));
3076 :
3077 46 : rangevar.schemaname = tab->at_nspname;
3078 46 : rangevar.relname = tab->at_relname;
3079 46 : rangevar.location = -1;
3080 :
3081 : /* Let pgstat know what we're doing */
3082 46 : autovac_report_activity(tab);
3083 :
3084 46 : vacuum(tab->at_vacoptions, &rangevar, tab->at_relid, &tab->at_params, NIL,
3085 : bstrategy, true);
3086 45 : }
3087 :
3088 : /*
3089 : * autovac_report_activity
3090 : * Report to pgstat what autovacuum is doing
3091 : *
3092 : * We send a SQL string corresponding to what the user would see if the
3093 : * equivalent command was to be issued manually.
3094 : *
3095 : * Note we assume that we are going to report the next command as soon as we're
3096 : * done with the current one, and exit right after the last one, so we don't
3097 : * bother to report "<IDLE>" or some such.
3098 : */
3099 : static void
3100 46 : autovac_report_activity(autovac_table *tab)
3101 : {
3102 : #define MAX_AUTOVAC_ACTIV_LEN (NAMEDATALEN * 2 + 56)
3103 : char activity[MAX_AUTOVAC_ACTIV_LEN];
3104 : int len;
3105 :
3106 : /* Report the command and possible options */
3107 46 : if (tab->at_vacoptions & VACOPT_VACUUM)
3108 7 : snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
3109 : "autovacuum: VACUUM%s",
3110 7 : tab->at_vacoptions & VACOPT_ANALYZE ? " ANALYZE" : "");
3111 : else
3112 39 : snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
3113 : "autovacuum: ANALYZE");
3114 :
3115 : /*
3116 : * Report the qualified name of the relation.
3117 : */
3118 46 : len = strlen(activity);
3119 :
3120 46 : snprintf(activity + len, MAX_AUTOVAC_ACTIV_LEN - len,
3121 : " %s.%s%s", tab->at_nspname, tab->at_relname,
3122 46 : tab->at_params.is_wraparound ? " (to prevent wraparound)" : "");
3123 :
3124 : /* Set statement_timestamp() to current time for pg_stat_activity */
3125 46 : SetCurrentStatementStartTimestamp();
3126 :
3127 46 : pgstat_report_activity(STATE_RUNNING, activity);
3128 46 : }
3129 :
3130 : /*
3131 : * autovac_report_workitem
3132 : * Report to pgstat that autovacuum is processing a work item
3133 : */
3134 : static void
3135 0 : autovac_report_workitem(AutoVacuumWorkItem *workitem,
3136 : const char *nspname, const char *relname)
3137 : {
3138 : char activity[MAX_AUTOVAC_ACTIV_LEN + 12 + 2];
3139 : char blk[12 + 2];
3140 : int len;
3141 :
3142 0 : switch (workitem->avw_type)
3143 : {
3144 : case AVW_BRINSummarizeRange:
3145 0 : snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
3146 : "autovacuum: BRIN summarize");
3147 0 : break;
3148 : }
3149 :
3150 : /*
3151 : * Report the qualified name of the relation, and the block number if any
3152 : */
3153 0 : len = strlen(activity);
3154 :
3155 0 : if (BlockNumberIsValid(workitem->avw_blockNumber))
3156 0 : snprintf(blk, sizeof(blk), " %u", workitem->avw_blockNumber);
3157 : else
3158 0 : blk[0] = '\0';
3159 :
3160 0 : snprintf(activity + len, MAX_AUTOVAC_ACTIV_LEN - len,
3161 : " %s.%s%s", nspname, relname, blk);
3162 :
3163 : /* Set statement_timestamp() to current time for pg_stat_activity */
3164 0 : SetCurrentStatementStartTimestamp();
3165 :
3166 0 : pgstat_report_activity(STATE_RUNNING, activity);
3167 0 : }
3168 :
3169 : /*
3170 : * AutoVacuumingActive
3171 : * Check GUC vars and report whether the autovacuum process should be
3172 : * running.
3173 : */
3174 : bool
3175 773 : AutoVacuumingActive(void)
3176 : {
3177 773 : if (!autovacuum_start_daemon || !pgstat_track_counts)
3178 0 : return false;
3179 773 : return true;
3180 : }
3181 :
3182 : /*
3183 : * Request one work item to the next autovacuum run processing our database.
3184 : */
3185 : void
3186 0 : AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId,
3187 : BlockNumber blkno)
3188 : {
3189 : int i;
3190 :
3191 0 : LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
3192 :
3193 : /*
3194 : * Locate an unused work item and fill it with the given data.
3195 : */
3196 0 : for (i = 0; i < NUM_WORKITEMS; i++)
3197 : {
3198 0 : AutoVacuumWorkItem *workitem = &AutoVacuumShmem->av_workItems[i];
3199 :
3200 0 : if (workitem->avw_used)
3201 0 : continue;
3202 :
3203 0 : workitem->avw_used = true;
3204 0 : workitem->avw_active = false;
3205 0 : workitem->avw_type = type;
3206 0 : workitem->avw_database = MyDatabaseId;
3207 0 : workitem->avw_relation = relationId;
3208 0 : workitem->avw_blockNumber = blkno;
3209 :
3210 : /* done */
3211 0 : break;
3212 : }
3213 :
3214 0 : LWLockRelease(AutovacuumLock);
3215 0 : }
3216 :
3217 : /*
3218 : * autovac_init
3219 : * This is called at postmaster initialization.
3220 : *
3221 : * All we do here is annoy the user if he got it wrong.
3222 : */
3223 : void
3224 1 : autovac_init(void)
3225 : {
3226 1 : if (autovacuum_start_daemon && !pgstat_track_counts)
3227 0 : ereport(WARNING,
3228 : (errmsg("autovacuum not started because of misconfiguration"),
3229 : errhint("Enable the \"track_counts\" option.")));
3230 1 : }
3231 :
3232 : /*
3233 : * IsAutoVacuum functions
3234 : * Return whether this is either a launcher autovacuum process or a worker
3235 : * process.
3236 : */
3237 : bool
3238 1959 : IsAutoVacuumLauncherProcess(void)
3239 : {
3240 1959 : return am_autovacuum_launcher;
3241 : }
3242 :
3243 : bool
3244 4029 : IsAutoVacuumWorkerProcess(void)
3245 : {
3246 4029 : return am_autovacuum_worker;
3247 : }
3248 :
3249 :
3250 : /*
3251 : * AutoVacuumShmemSize
3252 : * Compute space needed for autovacuum-related shared memory
3253 : */
3254 : Size
3255 10 : AutoVacuumShmemSize(void)
3256 : {
3257 : Size size;
3258 :
3259 : /*
3260 : * Need the fixed struct and the array of WorkerInfoData.
3261 : */
3262 10 : size = sizeof(AutoVacuumShmemStruct);
3263 10 : size = MAXALIGN(size);
3264 10 : size = add_size(size, mul_size(autovacuum_max_workers,
3265 : sizeof(WorkerInfoData)));
3266 10 : return size;
3267 : }
3268 :
3269 : /*
3270 : * AutoVacuumShmemInit
3271 : * Allocate and initialize autovacuum-related shared memory
3272 : */
3273 : void
3274 5 : AutoVacuumShmemInit(void)
3275 : {
3276 : bool found;
3277 :
3278 5 : AutoVacuumShmem = (AutoVacuumShmemStruct *)
3279 5 : ShmemInitStruct("AutoVacuum Data",
3280 : AutoVacuumShmemSize(),
3281 : &found);
3282 :
3283 5 : if (!IsUnderPostmaster)
3284 : {
3285 : WorkerInfo worker;
3286 : int i;
3287 :
3288 5 : Assert(!found);
3289 :
3290 5 : AutoVacuumShmem->av_launcherpid = 0;
3291 5 : dlist_init(&AutoVacuumShmem->av_freeWorkers);
3292 5 : dlist_init(&AutoVacuumShmem->av_runningWorkers);
3293 5 : AutoVacuumShmem->av_startingWorker = NULL;
3294 5 : memset(AutoVacuumShmem->av_workItems, 0,
3295 : sizeof(AutoVacuumWorkItem) * NUM_WORKITEMS);
3296 :
3297 5 : worker = (WorkerInfo) ((char *) AutoVacuumShmem +
3298 : MAXALIGN(sizeof(AutoVacuumShmemStruct)));
3299 :
3300 : /* initialize the WorkerInfo free list */
3301 20 : for (i = 0; i < autovacuum_max_workers; i++)
3302 15 : dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
3303 15 : &worker[i].wi_links);
3304 : }
3305 : else
3306 0 : Assert(found);
3307 5 : }
3308 :
3309 : /*
3310 : * autovac_refresh_stats
3311 : * Refresh pgstats data for an autovacuum process
3312 : *
3313 : * Cause the next pgstats read operation to obtain fresh data, but throttle
3314 : * such refreshing in the autovacuum launcher. This is mostly to avoid
3315 : * rereading the pgstats files too many times in quick succession when there
3316 : * are many databases.
3317 : *
3318 : * Note: we avoid throttling in the autovac worker, as it would be
3319 : * counterproductive in the recheck logic.
3320 : */
3321 : static void
3322 54 : autovac_refresh_stats(void)
3323 : {
3324 54 : if (IsAutoVacuumLauncherProcess())
3325 : {
3326 : static TimestampTz last_read = 0;
3327 : TimestampTz current_time;
3328 :
3329 7 : current_time = GetCurrentTimestamp();
3330 :
3331 7 : if (!TimestampDifferenceExceeds(last_read, current_time,
3332 : STATS_READ_DELAY))
3333 56 : return;
3334 :
3335 5 : last_read = current_time;
3336 : }
3337 :
3338 52 : pgstat_clear_snapshot();
3339 : }
|