Line data Source code
1 : /* ----------
2 : * pgstat.c
3 : *
4 : * All the statistics collector stuff hacked up in one big, ugly file.
5 : *
6 : * TODO: - Separate collector, postmaster and backend stuff
7 : * into different files.
8 : *
9 : * - Add some automatic call for pgstat vacuuming.
10 : *
11 : * - Add a pgstat config column to pg_database, so this
12 : * entire thing can be enabled/disabled on a per db basis.
13 : *
14 : * Copyright (c) 2001-2017, PostgreSQL Global Development Group
15 : *
16 : * src/backend/postmaster/pgstat.c
17 : * ----------
18 : */
19 : #include "postgres.h"
20 :
21 : #include <unistd.h>
22 : #include <fcntl.h>
23 : #include <sys/param.h>
24 : #include <sys/time.h>
25 : #include <sys/socket.h>
26 : #include <netdb.h>
27 : #include <netinet/in.h>
28 : #include <arpa/inet.h>
29 : #include <signal.h>
30 : #include <time.h>
31 : #ifdef HAVE_SYS_SELECT_H
32 : #include <sys/select.h>
33 : #endif
34 :
35 : #include "pgstat.h"
36 :
37 : #include "access/heapam.h"
38 : #include "access/htup_details.h"
39 : #include "access/transam.h"
40 : #include "access/twophase_rmgr.h"
41 : #include "access/xact.h"
42 : #include "catalog/pg_database.h"
43 : #include "catalog/pg_proc.h"
44 : #include "common/ip.h"
45 : #include "libpq/libpq.h"
46 : #include "libpq/pqsignal.h"
47 : #include "mb/pg_wchar.h"
48 : #include "miscadmin.h"
49 : #include "pg_trace.h"
50 : #include "postmaster/autovacuum.h"
51 : #include "postmaster/fork_process.h"
52 : #include "postmaster/postmaster.h"
53 : #include "replication/walsender.h"
54 : #include "storage/backendid.h"
55 : #include "storage/dsm.h"
56 : #include "storage/fd.h"
57 : #include "storage/ipc.h"
58 : #include "storage/latch.h"
59 : #include "storage/lmgr.h"
60 : #include "storage/pg_shmem.h"
61 : #include "storage/procsignal.h"
62 : #include "storage/sinvaladt.h"
63 : #include "utils/ascii.h"
64 : #include "utils/guc.h"
65 : #include "utils/memutils.h"
66 : #include "utils/ps_status.h"
67 : #include "utils/rel.h"
68 : #include "utils/snapmgr.h"
69 : #include "utils/timestamp.h"
70 : #include "utils/tqual.h"
71 :
72 :
73 : /* ----------
74 : * Timer definitions.
75 : * ----------
76 : */
77 : #define PGSTAT_STAT_INTERVAL 500 /* Minimum time between stats file
78 : * updates; in milliseconds. */
79 :
80 : #define PGSTAT_RETRY_DELAY 10 /* How long to wait between checks for a
81 : * new file; in milliseconds. */
82 :
83 : #define PGSTAT_MAX_WAIT_TIME 10000 /* Maximum time to wait for a stats
84 : * file update; in milliseconds. */
85 :
86 : #define PGSTAT_INQ_INTERVAL 640 /* How often to ping the collector for a
87 : * new file; in milliseconds. */
88 :
89 : #define PGSTAT_RESTART_INTERVAL 60 /* How often to attempt to restart a
90 : * failed statistics collector; in
91 : * seconds. */
92 :
93 : #define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
94 : #define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
95 :
96 : /* Minimum receive buffer size for the collector's socket. */
97 : #define PGSTAT_MIN_RCVBUF (100 * 1024)
98 :
99 :
100 : /* ----------
101 : * The initial size hints for the hash tables used in the collector.
102 : * ----------
103 : */
104 : #define PGSTAT_DB_HASH_SIZE 16
105 : #define PGSTAT_TAB_HASH_SIZE 512
106 : #define PGSTAT_FUNCTION_HASH_SIZE 512
107 :
108 :
109 : /* ----------
110 : * Total number of backends including auxiliary
111 : *
112 : * We reserve a slot for each possible BackendId, plus one for each
113 : * possible auxiliary process type. (This scheme assumes there is not
114 : * more than one of any auxiliary process type at a time.) MaxBackends
115 : * includes autovacuum workers and background workers as well.
116 : * ----------
117 : */
118 : #define NumBackendStatSlots (MaxBackends + NUM_AUXPROCTYPES)
119 :
120 :
121 : /* ----------
122 : * GUC parameters
123 : * ----------
124 : */
125 : bool pgstat_track_activities = false;
126 : bool pgstat_track_counts = false;
127 : int pgstat_track_functions = TRACK_FUNC_OFF;
128 : int pgstat_track_activity_query_size = 1024;
129 :
130 : /* ----------
131 : * Built from GUC parameter
132 : * ----------
133 : */
134 : char *pgstat_stat_directory = NULL;
135 : char *pgstat_stat_filename = NULL;
136 : char *pgstat_stat_tmpname = NULL;
137 :
138 : /*
139 : * BgWriter global statistics counters (unused in other processes).
140 : * Stored directly in a stats message structure so it can be sent
141 : * without needing to copy things around. We assume this inits to zeroes.
142 : */
143 : PgStat_MsgBgWriter BgWriterStats;
144 :
145 : /* ----------
146 : * Local data
147 : * ----------
148 : */
149 : NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET;
150 :
151 : static struct sockaddr_storage pgStatAddr;
152 :
153 : static time_t last_pgstat_start_time;
154 :
155 : static bool pgStatRunningInCollector = false;
156 :
157 : /*
158 : * Structures in which backends store per-table info that's waiting to be
159 : * sent to the collector.
160 : *
161 : * NOTE: once allocated, TabStatusArray structures are never moved or deleted
162 : * for the life of the backend. Also, we zero out the t_id fields of the
163 : * contained PgStat_TableStatus structs whenever they are not actively in use.
164 : * This allows relcache pgstat_info pointers to be treated as long-lived data,
165 : * avoiding repeated searches in pgstat_initstats() when a relation is
166 : * repeatedly opened during a transaction.
167 : */
168 : #define TABSTAT_QUANTUM 100 /* we alloc this many at a time */
169 :
170 : typedef struct TabStatusArray
171 : {
172 : struct TabStatusArray *tsa_next; /* link to next array, if any */
173 : int tsa_used; /* # entries currently used */
174 : PgStat_TableStatus tsa_entries[TABSTAT_QUANTUM]; /* per-table data */
175 : } TabStatusArray;
176 :
177 : static TabStatusArray *pgStatTabList = NULL;
178 :
179 : /*
180 : * pgStatTabHash entry: map from relation OID to PgStat_TableStatus pointer
181 : */
182 : typedef struct TabStatHashEntry
183 : {
184 : Oid t_id;
185 : PgStat_TableStatus *tsa_entry;
186 : } TabStatHashEntry;
187 :
188 : /*
189 : * Hash table for O(1) t_id -> tsa_entry lookup
190 : */
191 : static HTAB *pgStatTabHash = NULL;
192 :
193 : /*
194 : * Backends store per-function info that's waiting to be sent to the collector
195 : * in this hash table (indexed by function OID).
196 : */
197 : static HTAB *pgStatFunctions = NULL;
198 :
199 : /*
200 : * Indicates if backend has some function stats that it hasn't yet
201 : * sent to the collector.
202 : */
203 : static bool have_function_stats = false;
204 :
205 : /*
206 : * Tuple insertion/deletion counts for an open transaction can't be propagated
207 : * into PgStat_TableStatus counters until we know if it is going to commit
208 : * or abort. Hence, we keep these counts in per-subxact structs that live
209 : * in TopTransactionContext. This data structure is designed on the assumption
210 : * that subxacts won't usually modify very many tables.
211 : */
212 : typedef struct PgStat_SubXactStatus
213 : {
214 : int nest_level; /* subtransaction nest level */
215 : struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */
216 : PgStat_TableXactStatus *first; /* head of list for this subxact */
217 : } PgStat_SubXactStatus;
218 :
219 : static PgStat_SubXactStatus *pgStatXactStack = NULL;
220 :
221 : static int pgStatXactCommit = 0;
222 : static int pgStatXactRollback = 0;
223 : PgStat_Counter pgStatBlockReadTime = 0;
224 : PgStat_Counter pgStatBlockWriteTime = 0;
225 :
226 : /* Record that's written to 2PC state file when pgstat state is persisted */
227 : typedef struct TwoPhasePgStatRecord
228 : {
229 : PgStat_Counter tuples_inserted; /* tuples inserted in xact */
230 : PgStat_Counter tuples_updated; /* tuples updated in xact */
231 : PgStat_Counter tuples_deleted; /* tuples deleted in xact */
232 : PgStat_Counter inserted_pre_trunc; /* tuples inserted prior to truncate */
233 : PgStat_Counter updated_pre_trunc; /* tuples updated prior to truncate */
234 : PgStat_Counter deleted_pre_trunc; /* tuples deleted prior to truncate */
235 : Oid t_id; /* table's OID */
236 : bool t_shared; /* is it a shared catalog? */
237 : bool t_truncated; /* was the relation truncated? */
238 : } TwoPhasePgStatRecord;
239 :
240 : /*
241 : * Info about current "snapshot" of stats file
242 : */
243 : static MemoryContext pgStatLocalContext = NULL;
244 : static HTAB *pgStatDBHash = NULL;
245 :
246 : /* Status for backends including auxiliary */
247 : static LocalPgBackendStatus *localBackendStatusTable = NULL;
248 :
249 : /* Total number of backends including auxiliary */
250 : static int localNumBackends = 0;
251 :
252 : /*
253 : * Cluster wide statistics, kept in the stats collector.
254 : * Contains statistics that are not collected per database
255 : * or per table.
256 : */
257 : static PgStat_ArchiverStats archiverStats;
258 : static PgStat_GlobalStats globalStats;
259 :
260 : /*
261 : * List of OIDs of databases we need to write out. If an entry is InvalidOid,
262 : * it means to write only the shared-catalog stats ("DB 0"); otherwise, we
263 : * will write both that DB's data and the shared stats.
264 : */
265 : static List *pending_write_requests = NIL;
266 :
267 : /* Signal handler flags */
268 : static volatile bool need_exit = false;
269 : static volatile bool got_SIGHUP = false;
270 :
271 : /*
272 : * Total time charged to functions so far in the current backend.
273 : * We use this to help separate "self" and "other" time charges.
274 : * (We assume this initializes to zero.)
275 : */
276 : static instr_time total_func_time;
277 :
278 :
279 : /* ----------
280 : * Local function forward declarations
281 : * ----------
282 : */
283 : #ifdef EXEC_BACKEND
284 : static pid_t pgstat_forkexec(void);
285 : #endif
286 :
287 : NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
288 : static void pgstat_exit(SIGNAL_ARGS);
289 : static void pgstat_beshutdown_hook(int code, Datum arg);
290 : static void pgstat_sighup_handler(SIGNAL_ARGS);
291 :
292 : static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
293 : static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
294 : Oid tableoid, bool create);
295 : static void pgstat_write_statsfiles(bool permanent, bool allDbs);
296 : static void pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanent);
297 : static HTAB *pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep);
298 : static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent);
299 : static void backend_read_statsfile(void);
300 : static void pgstat_read_current_status(void);
301 :
302 : static bool pgstat_write_statsfile_needed(void);
303 : static bool pgstat_db_requested(Oid databaseid);
304 :
305 : static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
306 : static void pgstat_send_funcstats(void);
307 : static HTAB *pgstat_collect_oids(Oid catalogid);
308 :
309 : static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
310 :
311 : static void pgstat_setup_memcxt(void);
312 :
313 : static const char *pgstat_get_wait_activity(WaitEventActivity w);
314 : static const char *pgstat_get_wait_client(WaitEventClient w);
315 : static const char *pgstat_get_wait_ipc(WaitEventIPC w);
316 : static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
317 : static const char *pgstat_get_wait_io(WaitEventIO w);
318 :
319 : static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
320 : static void pgstat_send(void *msg, int len);
321 :
322 : static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len);
323 : static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
324 : static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
325 : static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
326 : static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len);
327 : static void pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len);
328 : static void pgstat_recv_resetsinglecounter(PgStat_MsgResetsinglecounter *msg, int len);
329 : static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
330 : static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
331 : static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
332 : static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
333 : static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
334 : static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
335 : static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
336 : static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
337 : static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
338 : static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
339 :
340 : /* ------------------------------------------------------------
341 : * Public functions called from postmaster follow
342 : * ------------------------------------------------------------
343 : */
344 :
345 : /* ----------
346 : * pgstat_init() -
347 : *
348 : * Called from postmaster at startup. Create the resources required
349 : * by the statistics collector process. If unable to do so, do not
350 : * fail --- better to let the postmaster start with stats collection
351 : * disabled.
352 : * ----------
353 : */
354 : void
355 1 : pgstat_init(void)
356 : {
357 : ACCEPT_TYPE_ARG3 alen;
358 1 : struct addrinfo *addrs = NULL,
359 : *addr,
360 : hints;
361 : int ret;
362 : fd_set rset;
363 : struct timeval tv;
364 : char test_byte;
365 : int sel_res;
366 1 : int tries = 0;
367 :
368 : #define TESTBYTEVAL ((char) 199)
369 :
370 : /*
371 : * This static assertion verifies that we didn't mess up the calculations
372 : * involved in selecting maximum payload sizes for our UDP messages.
373 : * Because the only consequence of overrunning PGSTAT_MAX_MSG_SIZE would
374 : * be silent performance loss from fragmentation, it seems worth having a
375 : * compile-time cross-check that we didn't.
376 : */
377 : StaticAssertStmt(sizeof(PgStat_Msg) <= PGSTAT_MAX_MSG_SIZE,
378 : "maximum stats message size exceeds PGSTAT_MAX_MSG_SIZE");
379 :
380 : /*
381 : * Create the UDP socket for sending and receiving statistic messages
382 : */
383 1 : hints.ai_flags = AI_PASSIVE;
384 1 : hints.ai_family = AF_UNSPEC;
385 1 : hints.ai_socktype = SOCK_DGRAM;
386 1 : hints.ai_protocol = 0;
387 1 : hints.ai_addrlen = 0;
388 1 : hints.ai_addr = NULL;
389 1 : hints.ai_canonname = NULL;
390 1 : hints.ai_next = NULL;
391 1 : ret = pg_getaddrinfo_all("localhost", NULL, &hints, &addrs);
392 1 : if (ret || !addrs)
393 : {
394 0 : ereport(LOG,
395 : (errmsg("could not resolve \"localhost\": %s",
396 : gai_strerror(ret))));
397 0 : goto startup_failed;
398 : }
399 :
400 : /*
401 : * On some platforms, pg_getaddrinfo_all() may return multiple addresses
402 : * only one of which will actually work (eg, both IPv6 and IPv4 addresses
403 : * when kernel will reject IPv6). Worse, the failure may occur at the
404 : * bind() or perhaps even connect() stage. So we must loop through the
405 : * results till we find a working combination. We will generate LOG
406 : * messages, but no error, for bogus combinations.
407 : */
408 1 : for (addr = addrs; addr; addr = addr->ai_next)
409 : {
410 : #ifdef HAVE_UNIX_SOCKETS
411 : /* Ignore AF_UNIX sockets, if any are returned. */
412 1 : if (addr->ai_family == AF_UNIX)
413 0 : continue;
414 : #endif
415 :
416 1 : if (++tries > 1)
417 0 : ereport(LOG,
418 : (errmsg("trying another address for the statistics collector")));
419 :
420 : /*
421 : * Create the socket.
422 : */
423 1 : if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) == PGINVALID_SOCKET)
424 : {
425 0 : ereport(LOG,
426 : (errcode_for_socket_access(),
427 : errmsg("could not create socket for statistics collector: %m")));
428 0 : continue;
429 : }
430 :
431 : /*
432 : * Bind it to a kernel assigned port on localhost and get the assigned
433 : * port via getsockname().
434 : */
435 1 : if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0)
436 : {
437 0 : ereport(LOG,
438 : (errcode_for_socket_access(),
439 : errmsg("could not bind socket for statistics collector: %m")));
440 0 : closesocket(pgStatSock);
441 0 : pgStatSock = PGINVALID_SOCKET;
442 0 : continue;
443 : }
444 :
445 1 : alen = sizeof(pgStatAddr);
446 1 : if (getsockname(pgStatSock, (struct sockaddr *) &pgStatAddr, &alen) < 0)
447 : {
448 0 : ereport(LOG,
449 : (errcode_for_socket_access(),
450 : errmsg("could not get address of socket for statistics collector: %m")));
451 0 : closesocket(pgStatSock);
452 0 : pgStatSock = PGINVALID_SOCKET;
453 0 : continue;
454 : }
455 :
456 : /*
457 : * Connect the socket to its own address. This saves a few cycles by
458 : * not having to respecify the target address on every send. This also
459 : * provides a kernel-level check that only packets from this same
460 : * address will be received.
461 : */
462 1 : if (connect(pgStatSock, (struct sockaddr *) &pgStatAddr, alen) < 0)
463 : {
464 0 : ereport(LOG,
465 : (errcode_for_socket_access(),
466 : errmsg("could not connect socket for statistics collector: %m")));
467 0 : closesocket(pgStatSock);
468 0 : pgStatSock = PGINVALID_SOCKET;
469 0 : continue;
470 : }
471 :
472 : /*
473 : * Try to send and receive a one-byte test message on the socket. This
474 : * is to catch situations where the socket can be created but will not
475 : * actually pass data (for instance, because kernel packet filtering
476 : * rules prevent it).
477 : */
478 1 : test_byte = TESTBYTEVAL;
479 :
480 : retry1:
481 1 : if (send(pgStatSock, &test_byte, 1, 0) != 1)
482 : {
483 0 : if (errno == EINTR)
484 0 : goto retry1; /* if interrupted, just retry */
485 0 : ereport(LOG,
486 : (errcode_for_socket_access(),
487 : errmsg("could not send test message on socket for statistics collector: %m")));
488 0 : closesocket(pgStatSock);
489 0 : pgStatSock = PGINVALID_SOCKET;
490 0 : continue;
491 : }
492 :
493 : /*
494 : * There could possibly be a little delay before the message can be
495 : * received. We arbitrarily allow up to half a second before deciding
496 : * it's broken.
497 : */
498 : for (;;) /* need a loop to handle EINTR */
499 : {
500 1 : FD_ZERO(&rset);
501 1 : FD_SET(pgStatSock, &rset);
502 :
503 1 : tv.tv_sec = 0;
504 1 : tv.tv_usec = 500000;
505 1 : sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv);
506 1 : if (sel_res >= 0 || errno != EINTR)
507 : break;
508 0 : }
509 1 : if (sel_res < 0)
510 : {
511 0 : ereport(LOG,
512 : (errcode_for_socket_access(),
513 : errmsg("select() failed in statistics collector: %m")));
514 0 : closesocket(pgStatSock);
515 0 : pgStatSock = PGINVALID_SOCKET;
516 0 : continue;
517 : }
518 1 : if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
519 : {
520 : /*
521 : * This is the case we actually think is likely, so take pains to
522 : * give a specific message for it.
523 : *
524 : * errno will not be set meaningfully here, so don't use it.
525 : */
526 0 : ereport(LOG,
527 : (errcode(ERRCODE_CONNECTION_FAILURE),
528 : errmsg("test message did not get through on socket for statistics collector")));
529 0 : closesocket(pgStatSock);
530 0 : pgStatSock = PGINVALID_SOCKET;
531 0 : continue;
532 : }
533 :
534 1 : test_byte++; /* just make sure variable is changed */
535 :
536 : retry2:
537 1 : if (recv(pgStatSock, &test_byte, 1, 0) != 1)
538 : {
539 0 : if (errno == EINTR)
540 0 : goto retry2; /* if interrupted, just retry */
541 0 : ereport(LOG,
542 : (errcode_for_socket_access(),
543 : errmsg("could not receive test message on socket for statistics collector: %m")));
544 0 : closesocket(pgStatSock);
545 0 : pgStatSock = PGINVALID_SOCKET;
546 0 : continue;
547 : }
548 :
549 1 : if (test_byte != TESTBYTEVAL) /* strictly paranoia ... */
550 : {
551 0 : ereport(LOG,
552 : (errcode(ERRCODE_INTERNAL_ERROR),
553 : errmsg("incorrect test message transmission on socket for statistics collector")));
554 0 : closesocket(pgStatSock);
555 0 : pgStatSock = PGINVALID_SOCKET;
556 0 : continue;
557 : }
558 :
559 : /* If we get here, we have a working socket */
560 1 : break;
561 : }
562 :
563 : /* Did we find a working address? */
564 1 : if (!addr || pgStatSock == PGINVALID_SOCKET)
565 : goto startup_failed;
566 :
567 : /*
568 : * Set the socket to non-blocking IO. This ensures that if the collector
569 : * falls behind, statistics messages will be discarded; backends won't
570 : * block waiting to send messages to the collector.
571 : */
572 1 : if (!pg_set_noblock(pgStatSock))
573 : {
574 0 : ereport(LOG,
575 : (errcode_for_socket_access(),
576 : errmsg("could not set statistics collector socket to nonblocking mode: %m")));
577 0 : goto startup_failed;
578 : }
579 :
580 : /*
581 : * Try to ensure that the socket's receive buffer is at least
582 : * PGSTAT_MIN_RCVBUF bytes, so that it won't easily overflow and lose
583 : * data. Use of UDP protocol means that we are willing to lose data under
584 : * heavy load, but we don't want it to happen just because of ridiculously
585 : * small default buffer sizes (such as 8KB on older Windows versions).
586 : */
587 : {
588 : int old_rcvbuf;
589 : int new_rcvbuf;
590 1 : ACCEPT_TYPE_ARG3 rcvbufsize = sizeof(old_rcvbuf);
591 :
592 1 : if (getsockopt(pgStatSock, SOL_SOCKET, SO_RCVBUF,
593 : (char *) &old_rcvbuf, &rcvbufsize) < 0)
594 : {
595 0 : elog(LOG, "getsockopt(SO_RCVBUF) failed: %m");
596 : /* if we can't get existing size, always try to set it */
597 0 : old_rcvbuf = 0;
598 : }
599 :
600 1 : new_rcvbuf = PGSTAT_MIN_RCVBUF;
601 1 : if (old_rcvbuf < new_rcvbuf)
602 : {
603 0 : if (setsockopt(pgStatSock, SOL_SOCKET, SO_RCVBUF,
604 : (char *) &new_rcvbuf, sizeof(new_rcvbuf)) < 0)
605 0 : elog(LOG, "setsockopt(SO_RCVBUF) failed: %m");
606 : }
607 : }
608 :
609 1 : pg_freeaddrinfo_all(hints.ai_family, addrs);
610 :
611 2 : return;
612 :
613 : startup_failed:
614 0 : ereport(LOG,
615 : (errmsg("disabling statistics collector for lack of working socket")));
616 :
617 0 : if (addrs)
618 0 : pg_freeaddrinfo_all(hints.ai_family, addrs);
619 :
620 0 : if (pgStatSock != PGINVALID_SOCKET)
621 0 : closesocket(pgStatSock);
622 0 : pgStatSock = PGINVALID_SOCKET;
623 :
624 : /*
625 : * Adjust GUC variables to suppress useless activity, and for debugging
626 : * purposes (seeing track_counts off is a clue that we failed here). We
627 : * use PGC_S_OVERRIDE because there is no point in trying to turn it back
628 : * on from postgresql.conf without a restart.
629 : */
630 0 : SetConfigOption("track_counts", "off", PGC_INTERNAL, PGC_S_OVERRIDE);
631 : }
632 :
633 : /*
634 : * subroutine for pgstat_reset_all
635 : */
636 : static void
637 0 : pgstat_reset_remove_files(const char *directory)
638 : {
639 : DIR *dir;
640 : struct dirent *entry;
641 : char fname[MAXPGPATH * 2];
642 :
643 0 : dir = AllocateDir(directory);
644 0 : while ((entry = ReadDir(dir, directory)) != NULL)
645 : {
646 : int nchars;
647 : Oid tmp_oid;
648 :
649 : /*
650 : * Skip directory entries that don't match the file names we write.
651 : * See get_dbstat_filename for the database-specific pattern.
652 : */
653 0 : if (strncmp(entry->d_name, "global.", 7) == 0)
654 0 : nchars = 7;
655 : else
656 : {
657 0 : nchars = 0;
658 0 : (void) sscanf(entry->d_name, "db_%u.%n",
659 : &tmp_oid, &nchars);
660 0 : if (nchars <= 0)
661 0 : continue;
662 : /* %u allows leading whitespace, so reject that */
663 0 : if (strchr("0123456789", entry->d_name[3]) == NULL)
664 0 : continue;
665 : }
666 :
667 0 : if (strcmp(entry->d_name + nchars, "tmp") != 0 &&
668 0 : strcmp(entry->d_name + nchars, "stat") != 0)
669 0 : continue;
670 :
671 0 : snprintf(fname, sizeof(fname), "%s/%s", directory,
672 0 : entry->d_name);
673 0 : unlink(fname);
674 : }
675 0 : FreeDir(dir);
676 0 : }
677 :
678 : /*
679 : * pgstat_reset_all() -
680 : *
681 : * Remove the stats files. This is currently used only if WAL
682 : * recovery is needed after a crash.
683 : */
684 : void
685 0 : pgstat_reset_all(void)
686 : {
687 0 : pgstat_reset_remove_files(pgstat_stat_directory);
688 0 : pgstat_reset_remove_files(PGSTAT_STAT_PERMANENT_DIRECTORY);
689 0 : }
690 :
691 : #ifdef EXEC_BACKEND
692 :
693 : /*
694 : * pgstat_forkexec() -
695 : *
696 : * Format up the arglist for, then fork and exec, statistics collector process
697 : */
698 : static pid_t
699 : pgstat_forkexec(void)
700 : {
701 : char *av[10];
702 : int ac = 0;
703 :
704 : av[ac++] = "postgres";
705 : av[ac++] = "--forkcol";
706 : av[ac++] = NULL; /* filled in by postmaster_forkexec */
707 :
708 : av[ac] = NULL;
709 : Assert(ac < lengthof(av));
710 :
711 : return postmaster_forkexec(ac, av);
712 : }
713 : #endif /* EXEC_BACKEND */
714 :
715 :
716 : /*
717 : * pgstat_start() -
718 : *
719 : * Called from postmaster at startup or after an existing collector
720 : * died. Attempt to fire up a fresh statistics collector.
721 : *
722 : * Returns PID of child process, or 0 if fail.
723 : *
724 : * Note: if fail, we will be called again from the postmaster main loop.
725 : */
726 : int
727 1 : pgstat_start(void)
728 : {
729 : time_t curtime;
730 : pid_t pgStatPid;
731 :
732 : /*
733 : * Check that the socket is there, else pgstat_init failed and we can do
734 : * nothing useful.
735 : */
736 1 : if (pgStatSock == PGINVALID_SOCKET)
737 0 : return 0;
738 :
739 : /*
740 : * Do nothing if too soon since last collector start. This is a safety
741 : * valve to protect against continuous respawn attempts if the collector
742 : * is dying immediately at launch. Note that since we will be re-called
743 : * from the postmaster main loop, we will get another chance later.
744 : */
745 1 : curtime = time(NULL);
746 1 : if ((unsigned int) (curtime - last_pgstat_start_time) <
747 : (unsigned int) PGSTAT_RESTART_INTERVAL)
748 0 : return 0;
749 1 : last_pgstat_start_time = curtime;
750 :
751 : /*
752 : * Okay, fork off the collector.
753 : */
754 : #ifdef EXEC_BACKEND
755 : switch ((pgStatPid = pgstat_forkexec()))
756 : #else
757 1 : switch ((pgStatPid = fork_process()))
758 : #endif
759 : {
760 : case -1:
761 0 : ereport(LOG,
762 : (errmsg("could not fork statistics collector: %m")));
763 0 : return 0;
764 :
765 : #ifndef EXEC_BACKEND
766 : case 0:
767 : /* in postmaster child ... */
768 1 : InitPostmasterChild();
769 :
770 : /* Close the postmaster's sockets */
771 1 : ClosePostmasterPorts(false);
772 :
773 : /* Drop our connection to postmaster's shared memory, as well */
774 1 : dsm_detach_all();
775 1 : PGSharedMemoryDetach();
776 :
777 1 : PgstatCollectorMain(0, NULL);
778 : break;
779 : #endif
780 :
781 : default:
782 1 : return (int) pgStatPid;
783 : }
784 :
785 : /* shouldn't get here */
786 : return 0;
787 : }
788 :
789 : void
790 0 : allow_immediate_pgstat_restart(void)
791 : {
792 0 : last_pgstat_start_time = 0;
793 0 : }
794 :
795 : /* ------------------------------------------------------------
796 : * Public functions used by backends follow
797 : *------------------------------------------------------------
798 : */
799 :
800 :
801 : /* ----------
802 : * pgstat_report_stat() -
803 : *
804 : * Must be called by processes that performs DML: tcop/postgres.c, logical
805 : * receiver processes, SPI worker, etc. to send the so far collected
806 : * per-table and function usage statistics to the collector. Note that this
807 : * is called only when not within a transaction, so it is fair to use
808 : * transaction stop time as an approximation of current time.
809 : * ----------
810 : */
811 : void
812 25284 : pgstat_report_stat(bool force)
813 : {
814 : /* we assume this inits to all zeroes: */
815 : static const PgStat_TableCounts all_zeroes;
816 : static TimestampTz last_report = 0;
817 :
818 : TimestampTz now;
819 : PgStat_MsgTabstat regular_msg;
820 : PgStat_MsgTabstat shared_msg;
821 : TabStatusArray *tsa;
822 : int i;
823 :
824 : /* Don't expend a clock check if nothing to do */
825 25895 : if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
826 626 : pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
827 4 : !have_function_stats)
828 24556 : return;
829 :
830 : /*
831 : * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
832 : * msec since we last sent one, or the caller wants to force stats out.
833 : */
834 25280 : now = GetCurrentTransactionStopTimestamp();
835 50229 : if (!force &&
836 24949 : !TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
837 24548 : return;
838 732 : last_report = now;
839 :
840 : /*
841 : * Destroy pgStatTabHash before we start invalidating PgStat_TableEntry
842 : * entries it points to. (Should we fail partway through the loop below,
843 : * it's okay to have removed the hashtable already --- the only
844 : * consequence is we'd get multiple entries for the same table in the
845 : * pgStatTabList, and that's safe.)
846 : */
847 732 : if (pgStatTabHash)
848 726 : hash_destroy(pgStatTabHash);
849 732 : pgStatTabHash = NULL;
850 :
851 : /*
852 : * Scan through the TabStatusArray struct(s) to find tables that actually
853 : * have counts, and build messages to send. We have to separate shared
854 : * relations from regular ones because the databaseid field in the message
855 : * header has to depend on that.
856 : */
857 732 : regular_msg.m_databaseid = MyDatabaseId;
858 732 : shared_msg.m_databaseid = InvalidOid;
859 732 : regular_msg.m_nentries = 0;
860 732 : shared_msg.m_nentries = 0;
861 :
862 1586 : for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
863 : {
864 37300 : for (i = 0; i < tsa->tsa_used; i++)
865 : {
866 36446 : PgStat_TableStatus *entry = &tsa->tsa_entries[i];
867 : PgStat_MsgTabstat *this_msg;
868 : PgStat_TableEntry *this_ent;
869 :
870 : /* Shouldn't have any pending transaction-dependent counts */
871 36446 : Assert(entry->trans == NULL);
872 :
873 : /*
874 : * Ignore entries that didn't accumulate any actual counts, such
875 : * as indexes that were opened by the planner but not used.
876 : */
877 36446 : if (memcmp(&entry->t_counts, &all_zeroes,
878 : sizeof(PgStat_TableCounts)) == 0)
879 6243 : continue;
880 :
881 : /*
882 : * OK, insert data into the appropriate message, and send if full.
883 : */
884 30203 : this_msg = entry->t_shared ? &shared_msg : ®ular_msg;
885 30203 : this_ent = &this_msg->m_entry[this_msg->m_nentries];
886 30203 : this_ent->t_id = entry->t_id;
887 30203 : memcpy(&this_ent->t_counts, &entry->t_counts,
888 : sizeof(PgStat_TableCounts));
889 30203 : if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
890 : {
891 2670 : pgstat_send_tabstat(this_msg);
892 2670 : this_msg->m_nentries = 0;
893 : }
894 : }
895 : /* zero out TableStatus structs after use */
896 854 : MemSet(tsa->tsa_entries, 0,
897 : tsa->tsa_used * sizeof(PgStat_TableStatus));
898 854 : tsa->tsa_used = 0;
899 : }
900 :
901 : /*
902 : * Send partial messages. Make sure that any pending xact commit/abort
903 : * gets counted, even if there are no table stats to send.
904 : */
905 795 : if (regular_msg.m_nentries > 0 ||
906 120 : pgStatXactCommit > 0 || pgStatXactRollback > 0)
907 675 : pgstat_send_tabstat(®ular_msg);
908 732 : if (shared_msg.m_nentries > 0)
909 689 : pgstat_send_tabstat(&shared_msg);
910 :
911 : /* Now, send function statistics */
912 732 : pgstat_send_funcstats();
913 : }
914 :
915 : /*
916 : * Subroutine for pgstat_report_stat: finish and send a tabstat message
917 : */
918 : static void
919 4034 : pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
920 : {
921 : int n;
922 : int len;
923 :
924 : /* It's unlikely we'd get here with no socket, but maybe not impossible */
925 4034 : if (pgStatSock == PGINVALID_SOCKET)
926 4039 : return;
927 :
928 : /*
929 : * Report and reset accumulated xact commit/rollback and I/O timings
930 : * whenever we send a normal tabstat message
931 : */
932 4029 : if (OidIsValid(tsmsg->m_databaseid))
933 : {
934 3240 : tsmsg->m_xact_commit = pgStatXactCommit;
935 3240 : tsmsg->m_xact_rollback = pgStatXactRollback;
936 3240 : tsmsg->m_block_read_time = pgStatBlockReadTime;
937 3240 : tsmsg->m_block_write_time = pgStatBlockWriteTime;
938 3240 : pgStatXactCommit = 0;
939 3240 : pgStatXactRollback = 0;
940 3240 : pgStatBlockReadTime = 0;
941 3240 : pgStatBlockWriteTime = 0;
942 : }
943 : else
944 : {
945 789 : tsmsg->m_xact_commit = 0;
946 789 : tsmsg->m_xact_rollback = 0;
947 789 : tsmsg->m_block_read_time = 0;
948 789 : tsmsg->m_block_write_time = 0;
949 : }
950 :
951 4029 : n = tsmsg->m_nentries;
952 4029 : len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
953 4029 : n * sizeof(PgStat_TableEntry);
954 :
955 4029 : pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
956 4029 : pgstat_send(tsmsg, len);
957 : }
958 :
959 : /*
960 : * Subroutine for pgstat_report_stat: populate and send a function stat message
961 : */
962 : static void
963 732 : pgstat_send_funcstats(void)
964 : {
965 : /* we assume this inits to all zeroes: */
966 : static const PgStat_FunctionCounts all_zeroes;
967 :
968 : PgStat_MsgFuncstat msg;
969 : PgStat_BackendFunctionEntry *entry;
970 : HASH_SEQ_STATUS fstat;
971 :
972 732 : if (pgStatFunctions == NULL)
973 1464 : return;
974 :
975 0 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_FUNCSTAT);
976 0 : msg.m_databaseid = MyDatabaseId;
977 0 : msg.m_nentries = 0;
978 :
979 0 : hash_seq_init(&fstat, pgStatFunctions);
980 0 : while ((entry = (PgStat_BackendFunctionEntry *) hash_seq_search(&fstat)) != NULL)
981 : {
982 : PgStat_FunctionEntry *m_ent;
983 :
984 : /* Skip it if no counts accumulated since last time */
985 0 : if (memcmp(&entry->f_counts, &all_zeroes,
986 : sizeof(PgStat_FunctionCounts)) == 0)
987 0 : continue;
988 :
989 : /* need to convert format of time accumulators */
990 0 : m_ent = &msg.m_entry[msg.m_nentries];
991 0 : m_ent->f_id = entry->f_id;
992 0 : m_ent->f_numcalls = entry->f_counts.f_numcalls;
993 0 : m_ent->f_total_time = INSTR_TIME_GET_MICROSEC(entry->f_counts.f_total_time);
994 0 : m_ent->f_self_time = INSTR_TIME_GET_MICROSEC(entry->f_counts.f_self_time);
995 :
996 0 : if (++msg.m_nentries >= PGSTAT_NUM_FUNCENTRIES)
997 : {
998 0 : pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
999 0 : msg.m_nentries * sizeof(PgStat_FunctionEntry));
1000 0 : msg.m_nentries = 0;
1001 : }
1002 :
1003 : /* reset the entry's counts */
1004 0 : MemSet(&entry->f_counts, 0, sizeof(PgStat_FunctionCounts));
1005 : }
1006 :
1007 0 : if (msg.m_nentries > 0)
1008 0 : pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
1009 0 : msg.m_nentries * sizeof(PgStat_FunctionEntry));
1010 :
1011 0 : have_function_stats = false;
1012 : }
1013 :
1014 :
1015 : /* ----------
1016 : * pgstat_vacuum_stat() -
1017 : *
1018 : * Will tell the collector about objects he can get rid of.
1019 : * ----------
1020 : */
1021 : void
1022 51 : pgstat_vacuum_stat(void)
1023 : {
1024 : HTAB *htab;
1025 : PgStat_MsgTabpurge msg;
1026 : PgStat_MsgFuncpurge f_msg;
1027 : HASH_SEQ_STATUS hstat;
1028 : PgStat_StatDBEntry *dbentry;
1029 : PgStat_StatTabEntry *tabentry;
1030 : PgStat_StatFuncEntry *funcentry;
1031 : int len;
1032 :
1033 51 : if (pgStatSock == PGINVALID_SOCKET)
1034 8 : return;
1035 :
1036 : /*
1037 : * If not done for this transaction, read the statistics collector stats
1038 : * file into some hash tables.
1039 : */
1040 47 : backend_read_statsfile();
1041 :
1042 : /*
1043 : * Read pg_database and make a list of OIDs of all existing databases
1044 : */
1045 47 : htab = pgstat_collect_oids(DatabaseRelationId);
1046 :
1047 : /*
1048 : * Search the database hash table for dead databases and tell the
1049 : * collector to drop them.
1050 : */
1051 47 : hash_seq_init(&hstat, pgStatDBHash);
1052 235 : while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
1053 : {
1054 141 : Oid dbid = dbentry->databaseid;
1055 :
1056 141 : CHECK_FOR_INTERRUPTS();
1057 :
1058 : /* the DB entry for shared tables (with InvalidOid) is never dropped */
1059 235 : if (OidIsValid(dbid) &&
1060 94 : hash_search(htab, (void *) &dbid, HASH_FIND, NULL) == NULL)
1061 0 : pgstat_drop_database(dbid);
1062 : }
1063 :
1064 : /* Clean up */
1065 47 : hash_destroy(htab);
1066 :
1067 : /*
1068 : * Lookup our own database entry; if not found, nothing more to do.
1069 : */
1070 47 : dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
1071 : (void *) &MyDatabaseId,
1072 : HASH_FIND, NULL);
1073 47 : if (dbentry == NULL || dbentry->tables == NULL)
1074 0 : return;
1075 :
1076 : /*
1077 : * Similarly to above, make a list of all known relations in this DB.
1078 : */
1079 47 : htab = pgstat_collect_oids(RelationRelationId);
1080 :
1081 : /*
1082 : * Initialize our messages table counter to zero
1083 : */
1084 47 : msg.m_nentries = 0;
1085 :
1086 : /*
1087 : * Check for all tables listed in stats hashtable if they still exist.
1088 : */
1089 47 : hash_seq_init(&hstat, dbentry->tables);
1090 19038 : while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
1091 : {
1092 18944 : Oid tabid = tabentry->tableid;
1093 :
1094 18944 : CHECK_FOR_INTERRUPTS();
1095 :
1096 18944 : if (hash_search(htab, (void *) &tabid, HASH_FIND, NULL) != NULL)
1097 15463 : continue;
1098 :
1099 : /*
1100 : * Not there, so add this table's Oid to the message
1101 : */
1102 3481 : msg.m_tableid[msg.m_nentries++] = tabid;
1103 :
1104 : /*
1105 : * If the message is full, send it out and reinitialize to empty
1106 : */
1107 3481 : if (msg.m_nentries >= PGSTAT_NUM_TABPURGE)
1108 : {
1109 3 : len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
1110 3 : + msg.m_nentries * sizeof(Oid);
1111 :
1112 3 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
1113 3 : msg.m_databaseid = MyDatabaseId;
1114 3 : pgstat_send(&msg, len);
1115 :
1116 3 : msg.m_nentries = 0;
1117 : }
1118 : }
1119 :
1120 : /*
1121 : * Send the rest
1122 : */
1123 47 : if (msg.m_nentries > 0)
1124 : {
1125 44 : len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
1126 44 : + msg.m_nentries * sizeof(Oid);
1127 :
1128 44 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
1129 44 : msg.m_databaseid = MyDatabaseId;
1130 44 : pgstat_send(&msg, len);
1131 : }
1132 :
1133 : /* Clean up */
1134 47 : hash_destroy(htab);
1135 :
1136 : /*
1137 : * Now repeat the above steps for functions. However, we needn't bother
1138 : * in the common case where no function stats are being collected.
1139 : */
1140 94 : if (dbentry->functions != NULL &&
1141 47 : hash_get_num_entries(dbentry->functions) > 0)
1142 : {
1143 0 : htab = pgstat_collect_oids(ProcedureRelationId);
1144 :
1145 0 : pgstat_setheader(&f_msg.m_hdr, PGSTAT_MTYPE_FUNCPURGE);
1146 0 : f_msg.m_databaseid = MyDatabaseId;
1147 0 : f_msg.m_nentries = 0;
1148 :
1149 0 : hash_seq_init(&hstat, dbentry->functions);
1150 0 : while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&hstat)) != NULL)
1151 : {
1152 0 : Oid funcid = funcentry->functionid;
1153 :
1154 0 : CHECK_FOR_INTERRUPTS();
1155 :
1156 0 : if (hash_search(htab, (void *) &funcid, HASH_FIND, NULL) != NULL)
1157 0 : continue;
1158 :
1159 : /*
1160 : * Not there, so add this function's Oid to the message
1161 : */
1162 0 : f_msg.m_functionid[f_msg.m_nentries++] = funcid;
1163 :
1164 : /*
1165 : * If the message is full, send it out and reinitialize to empty
1166 : */
1167 0 : if (f_msg.m_nentries >= PGSTAT_NUM_FUNCPURGE)
1168 : {
1169 0 : len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
1170 0 : + f_msg.m_nentries * sizeof(Oid);
1171 :
1172 0 : pgstat_send(&f_msg, len);
1173 :
1174 0 : f_msg.m_nentries = 0;
1175 : }
1176 : }
1177 :
1178 : /*
1179 : * Send the rest
1180 : */
1181 0 : if (f_msg.m_nentries > 0)
1182 : {
1183 0 : len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
1184 0 : + f_msg.m_nentries * sizeof(Oid);
1185 :
1186 0 : pgstat_send(&f_msg, len);
1187 : }
1188 :
1189 0 : hash_destroy(htab);
1190 : }
1191 : }
1192 :
1193 :
1194 : /* ----------
1195 : * pgstat_collect_oids() -
1196 : *
1197 : * Collect the OIDs of all objects listed in the specified system catalog
1198 : * into a temporary hash table. Caller should hash_destroy the result
1199 : * when done with it. (However, we make the table in CurrentMemoryContext
1200 : * so that it will be freed properly in event of an error.)
1201 : * ----------
1202 : */
1203 : static HTAB *
1204 94 : pgstat_collect_oids(Oid catalogid)
1205 : {
1206 : HTAB *htab;
1207 : HASHCTL hash_ctl;
1208 : Relation rel;
1209 : HeapScanDesc scan;
1210 : HeapTuple tup;
1211 : Snapshot snapshot;
1212 :
1213 94 : memset(&hash_ctl, 0, sizeof(hash_ctl));
1214 94 : hash_ctl.keysize = sizeof(Oid);
1215 94 : hash_ctl.entrysize = sizeof(Oid);
1216 94 : hash_ctl.hcxt = CurrentMemoryContext;
1217 94 : htab = hash_create("Temporary table of OIDs",
1218 : PGSTAT_TAB_HASH_SIZE,
1219 : &hash_ctl,
1220 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
1221 :
1222 94 : rel = heap_open(catalogid, AccessShareLock);
1223 94 : snapshot = RegisterSnapshot(GetLatestSnapshot());
1224 94 : scan = heap_beginscan(rel, snapshot, 0, NULL);
1225 30123 : while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
1226 : {
1227 29935 : Oid thisoid = HeapTupleGetOid(tup);
1228 :
1229 29935 : CHECK_FOR_INTERRUPTS();
1230 :
1231 29935 : (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL);
1232 : }
1233 94 : heap_endscan(scan);
1234 94 : UnregisterSnapshot(snapshot);
1235 94 : heap_close(rel, AccessShareLock);
1236 :
1237 94 : return htab;
1238 : }
1239 :
1240 :
1241 : /* ----------
1242 : * pgstat_drop_database() -
1243 : *
1244 : * Tell the collector that we just dropped a database.
1245 : * (If the message gets lost, we will still clean the dead DB eventually
1246 : * via future invocations of pgstat_vacuum_stat().)
1247 : * ----------
1248 : */
1249 : void
1250 0 : pgstat_drop_database(Oid databaseid)
1251 : {
1252 : PgStat_MsgDropdb msg;
1253 :
1254 0 : if (pgStatSock == PGINVALID_SOCKET)
1255 0 : return;
1256 :
1257 0 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DROPDB);
1258 0 : msg.m_databaseid = databaseid;
1259 0 : pgstat_send(&msg, sizeof(msg));
1260 : }
1261 :
1262 :
1263 : /* ----------
1264 : * pgstat_drop_relation() -
1265 : *
1266 : * Tell the collector that we just dropped a relation.
1267 : * (If the message gets lost, we will still clean the dead entry eventually
1268 : * via future invocations of pgstat_vacuum_stat().)
1269 : *
1270 : * Currently not used for lack of any good place to call it; we rely
1271 : * entirely on pgstat_vacuum_stat() to clean out stats for dead rels.
1272 : * ----------
1273 : */
1274 : #ifdef NOT_USED
1275 : void
1276 : pgstat_drop_relation(Oid relid)
1277 : {
1278 : PgStat_MsgTabpurge msg;
1279 : int len;
1280 :
1281 : if (pgStatSock == PGINVALID_SOCKET)
1282 : return;
1283 :
1284 : msg.m_tableid[0] = relid;
1285 : msg.m_nentries = 1;
1286 :
1287 : len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) + sizeof(Oid);
1288 :
1289 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
1290 : msg.m_databaseid = MyDatabaseId;
1291 : pgstat_send(&msg, len);
1292 : }
1293 : #endif /* NOT_USED */
1294 :
1295 :
1296 : /* ----------
1297 : * pgstat_reset_counters() -
1298 : *
1299 : * Tell the statistics collector to reset counters for our database.
1300 : *
1301 : * Permission checking for this function is managed through the normal
1302 : * GRANT system.
1303 : * ----------
1304 : */
1305 : void
1306 0 : pgstat_reset_counters(void)
1307 : {
1308 : PgStat_MsgResetcounter msg;
1309 :
1310 0 : if (pgStatSock == PGINVALID_SOCKET)
1311 0 : return;
1312 :
1313 0 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETCOUNTER);
1314 0 : msg.m_databaseid = MyDatabaseId;
1315 0 : pgstat_send(&msg, sizeof(msg));
1316 : }
1317 :
1318 : /* ----------
1319 : * pgstat_reset_shared_counters() -
1320 : *
1321 : * Tell the statistics collector to reset cluster-wide shared counters.
1322 : *
1323 : * Permission checking for this function is managed through the normal
1324 : * GRANT system.
1325 : * ----------
1326 : */
1327 : void
1328 0 : pgstat_reset_shared_counters(const char *target)
1329 : {
1330 : PgStat_MsgResetsharedcounter msg;
1331 :
1332 0 : if (pgStatSock == PGINVALID_SOCKET)
1333 0 : return;
1334 :
1335 0 : if (strcmp(target, "archiver") == 0)
1336 0 : msg.m_resettarget = RESET_ARCHIVER;
1337 0 : else if (strcmp(target, "bgwriter") == 0)
1338 0 : msg.m_resettarget = RESET_BGWRITER;
1339 : else
1340 0 : ereport(ERROR,
1341 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1342 : errmsg("unrecognized reset target: \"%s\"", target),
1343 : errhint("Target must be \"archiver\" or \"bgwriter\".")));
1344 :
1345 0 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
1346 0 : pgstat_send(&msg, sizeof(msg));
1347 : }
1348 :
1349 : /* ----------
1350 : * pgstat_reset_single_counter() -
1351 : *
1352 : * Tell the statistics collector to reset a single counter.
1353 : *
1354 : * Permission checking for this function is managed through the normal
1355 : * GRANT system.
1356 : * ----------
1357 : */
1358 : void
1359 0 : pgstat_reset_single_counter(Oid objoid, PgStat_Single_Reset_Type type)
1360 : {
1361 : PgStat_MsgResetsinglecounter msg;
1362 :
1363 0 : if (pgStatSock == PGINVALID_SOCKET)
1364 0 : return;
1365 :
1366 0 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSINGLECOUNTER);
1367 0 : msg.m_databaseid = MyDatabaseId;
1368 0 : msg.m_resettype = type;
1369 0 : msg.m_objectid = objoid;
1370 :
1371 0 : pgstat_send(&msg, sizeof(msg));
1372 : }
1373 :
1374 : /* ----------
1375 : * pgstat_report_autovac() -
1376 : *
1377 : * Called from autovacuum.c to report startup of an autovacuum process.
1378 : * We are called before InitPostgres is done, so can't rely on MyDatabaseId;
1379 : * the db OID must be passed in, instead.
1380 : * ----------
1381 : */
1382 : void
1383 3 : pgstat_report_autovac(Oid dboid)
1384 : {
1385 : PgStat_MsgAutovacStart msg;
1386 :
1387 3 : if (pgStatSock == PGINVALID_SOCKET)
1388 3 : return;
1389 :
1390 3 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_AUTOVAC_START);
1391 3 : msg.m_databaseid = dboid;
1392 3 : msg.m_start_time = GetCurrentTimestamp();
1393 :
1394 3 : pgstat_send(&msg, sizeof(msg));
1395 : }
1396 :
1397 :
1398 : /* ---------
1399 : * pgstat_report_vacuum() -
1400 : *
1401 : * Tell the collector about the table we just vacuumed.
1402 : * ---------
1403 : */
1404 : void
1405 390 : pgstat_report_vacuum(Oid tableoid, bool shared,
1406 : PgStat_Counter livetuples, PgStat_Counter deadtuples)
1407 : {
1408 : PgStat_MsgVacuum msg;
1409 :
1410 390 : if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
1411 481 : return;
1412 :
1413 299 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM);
1414 299 : msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
1415 299 : msg.m_tableoid = tableoid;
1416 299 : msg.m_autovacuum = IsAutoVacuumWorkerProcess();
1417 299 : msg.m_vacuumtime = GetCurrentTimestamp();
1418 299 : msg.m_live_tuples = livetuples;
1419 299 : msg.m_dead_tuples = deadtuples;
1420 299 : pgstat_send(&msg, sizeof(msg));
1421 : }
1422 :
1423 : /* --------
1424 : * pgstat_report_analyze() -
1425 : *
1426 : * Tell the collector about the table we just analyzed.
1427 : *
1428 : * Caller must provide new live- and dead-tuples estimates, as well as a
1429 : * flag indicating whether to reset the changes_since_analyze counter.
1430 : * --------
1431 : */
1432 : void
1433 206 : pgstat_report_analyze(Relation rel,
1434 : PgStat_Counter livetuples, PgStat_Counter deadtuples,
1435 : bool resetcounter)
1436 : {
1437 : PgStat_MsgAnalyze msg;
1438 :
1439 206 : if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
1440 274 : return;
1441 :
1442 : /*
1443 : * Unlike VACUUM, ANALYZE might be running inside a transaction that has
1444 : * already inserted and/or deleted rows in the target table. ANALYZE will
1445 : * have counted such rows as live or dead respectively. Because we will
1446 : * report our counts of such rows at transaction end, we should subtract
1447 : * off these counts from what we send to the collector now, else they'll
1448 : * be double-counted after commit. (This approach also ensures that the
1449 : * collector ends up with the right numbers if we abort instead of
1450 : * committing.)
1451 : */
1452 138 : if (rel->pgstat_info != NULL)
1453 : {
1454 : PgStat_TableXactStatus *trans;
1455 :
1456 149 : for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
1457 : {
1458 11 : livetuples -= trans->tuples_inserted - trans->tuples_deleted;
1459 11 : deadtuples -= trans->tuples_updated + trans->tuples_deleted;
1460 : }
1461 : /* count stuff inserted by already-aborted subxacts, too */
1462 138 : deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
1463 : /* Since ANALYZE's counts are estimates, we could have underflowed */
1464 138 : livetuples = Max(livetuples, 0);
1465 138 : deadtuples = Max(deadtuples, 0);
1466 : }
1467 :
1468 138 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
1469 138 : msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
1470 138 : msg.m_tableoid = RelationGetRelid(rel);
1471 138 : msg.m_autovacuum = IsAutoVacuumWorkerProcess();
1472 138 : msg.m_resetcounter = resetcounter;
1473 138 : msg.m_analyzetime = GetCurrentTimestamp();
1474 138 : msg.m_live_tuples = livetuples;
1475 138 : msg.m_dead_tuples = deadtuples;
1476 138 : pgstat_send(&msg, sizeof(msg));
1477 : }
1478 :
1479 : /* --------
1480 : * pgstat_report_recovery_conflict() -
1481 : *
1482 : * Tell the collector about a Hot Standby recovery conflict.
1483 : * --------
1484 : */
1485 : void
1486 0 : pgstat_report_recovery_conflict(int reason)
1487 : {
1488 : PgStat_MsgRecoveryConflict msg;
1489 :
1490 0 : if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
1491 0 : return;
1492 :
1493 0 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RECOVERYCONFLICT);
1494 0 : msg.m_databaseid = MyDatabaseId;
1495 0 : msg.m_reason = reason;
1496 0 : pgstat_send(&msg, sizeof(msg));
1497 : }
1498 :
1499 : /* --------
1500 : * pgstat_report_deadlock() -
1501 : *
1502 : * Tell the collector about a deadlock detected.
1503 : * --------
1504 : */
1505 : void
1506 0 : pgstat_report_deadlock(void)
1507 : {
1508 : PgStat_MsgDeadlock msg;
1509 :
1510 0 : if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
1511 0 : return;
1512 :
1513 0 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DEADLOCK);
1514 0 : msg.m_databaseid = MyDatabaseId;
1515 0 : pgstat_send(&msg, sizeof(msg));
1516 : }
1517 :
1518 : /* --------
1519 : * pgstat_report_tempfile() -
1520 : *
1521 : * Tell the collector about a temporary file.
1522 : * --------
1523 : */
1524 : void
1525 24 : pgstat_report_tempfile(size_t filesize)
1526 : {
1527 : PgStat_MsgTempFile msg;
1528 :
1529 24 : if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
1530 24 : return;
1531 :
1532 24 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TEMPFILE);
1533 24 : msg.m_databaseid = MyDatabaseId;
1534 24 : msg.m_filesize = filesize;
1535 24 : pgstat_send(&msg, sizeof(msg));
1536 : }
1537 :
1538 :
1539 : /* ----------
1540 : * pgstat_ping() -
1541 : *
1542 : * Send some junk data to the collector to increase traffic.
1543 : * ----------
1544 : */
1545 : void
1546 0 : pgstat_ping(void)
1547 : {
1548 : PgStat_MsgDummy msg;
1549 :
1550 0 : if (pgStatSock == PGINVALID_SOCKET)
1551 0 : return;
1552 :
1553 0 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DUMMY);
1554 0 : pgstat_send(&msg, sizeof(msg));
1555 : }
1556 :
1557 : /* ----------
1558 : * pgstat_send_inquiry() -
1559 : *
1560 : * Notify collector that we need fresh data.
1561 : * ----------
1562 : */
1563 : static void
1564 83 : pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databaseid)
1565 : {
1566 : PgStat_MsgInquiry msg;
1567 :
1568 83 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
1569 83 : msg.clock_time = clock_time;
1570 83 : msg.cutoff_time = cutoff_time;
1571 83 : msg.databaseid = databaseid;
1572 83 : pgstat_send(&msg, sizeof(msg));
1573 83 : }
1574 :
1575 :
1576 : /*
1577 : * Initialize function call usage data.
1578 : * Called by the executor before invoking a function.
1579 : */
1580 : void
1581 685422 : pgstat_init_function_usage(FunctionCallInfoData *fcinfo,
1582 : PgStat_FunctionCallUsage *fcu)
1583 : {
1584 : PgStat_BackendFunctionEntry *htabent;
1585 : bool found;
1586 :
1587 685422 : if (pgstat_track_functions <= fcinfo->flinfo->fn_stats)
1588 : {
1589 : /* stats not wanted */
1590 685422 : fcu->fs = NULL;
1591 1370844 : return;
1592 : }
1593 :
1594 0 : if (!pgStatFunctions)
1595 : {
1596 : /* First time through - initialize function stat table */
1597 : HASHCTL hash_ctl;
1598 :
1599 0 : memset(&hash_ctl, 0, sizeof(hash_ctl));
1600 0 : hash_ctl.keysize = sizeof(Oid);
1601 0 : hash_ctl.entrysize = sizeof(PgStat_BackendFunctionEntry);
1602 0 : pgStatFunctions = hash_create("Function stat entries",
1603 : PGSTAT_FUNCTION_HASH_SIZE,
1604 : &hash_ctl,
1605 : HASH_ELEM | HASH_BLOBS);
1606 : }
1607 :
1608 : /* Get the stats entry for this function, create if necessary */
1609 0 : htabent = hash_search(pgStatFunctions, &fcinfo->flinfo->fn_oid,
1610 : HASH_ENTER, &found);
1611 0 : if (!found)
1612 0 : MemSet(&htabent->f_counts, 0, sizeof(PgStat_FunctionCounts));
1613 :
1614 0 : fcu->fs = &htabent->f_counts;
1615 :
1616 : /* save stats for this function, later used to compensate for recursion */
1617 0 : fcu->save_f_total_time = htabent->f_counts.f_total_time;
1618 :
1619 : /* save current backend-wide total time */
1620 0 : fcu->save_total = total_func_time;
1621 :
1622 : /* get clock time as of function start */
1623 0 : INSTR_TIME_SET_CURRENT(fcu->f_start);
1624 : }
1625 :
1626 : /*
1627 : * find_funcstat_entry - find any existing PgStat_BackendFunctionEntry entry
1628 : * for specified function
1629 : *
1630 : * If no entry, return NULL, don't create a new one
1631 : */
1632 : PgStat_BackendFunctionEntry *
1633 0 : find_funcstat_entry(Oid func_id)
1634 : {
1635 0 : if (pgStatFunctions == NULL)
1636 0 : return NULL;
1637 :
1638 0 : return (PgStat_BackendFunctionEntry *) hash_search(pgStatFunctions,
1639 : (void *) &func_id,
1640 : HASH_FIND, NULL);
1641 : }
1642 :
1643 : /*
1644 : * Calculate function call usage and update stat counters.
1645 : * Called by the executor after invoking a function.
1646 : *
1647 : * In the case of a set-returning function that runs in value-per-call mode,
1648 : * we will see multiple pgstat_init_function_usage/pgstat_end_function_usage
1649 : * calls for what the user considers a single call of the function. The
1650 : * finalize flag should be TRUE on the last call.
1651 : */
1652 : void
1653 685233 : pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu, bool finalize)
1654 : {
1655 685233 : PgStat_FunctionCounts *fs = fcu->fs;
1656 : instr_time f_total;
1657 : instr_time f_others;
1658 : instr_time f_self;
1659 :
1660 : /* stats not wanted? */
1661 685233 : if (fs == NULL)
1662 1370466 : return;
1663 :
1664 : /* total elapsed time in this function call */
1665 0 : INSTR_TIME_SET_CURRENT(f_total);
1666 0 : INSTR_TIME_SUBTRACT(f_total, fcu->f_start);
1667 :
1668 : /* self usage: elapsed minus anything already charged to other calls */
1669 0 : f_others = total_func_time;
1670 0 : INSTR_TIME_SUBTRACT(f_others, fcu->save_total);
1671 0 : f_self = f_total;
1672 0 : INSTR_TIME_SUBTRACT(f_self, f_others);
1673 :
1674 : /* update backend-wide total time */
1675 0 : INSTR_TIME_ADD(total_func_time, f_self);
1676 :
1677 : /*
1678 : * Compute the new f_total_time as the total elapsed time added to the
1679 : * pre-call value of f_total_time. This is necessary to avoid
1680 : * double-counting any time taken by recursive calls of myself. (We do
1681 : * not need any similar kluge for self time, since that already excludes
1682 : * any recursive calls.)
1683 : */
1684 0 : INSTR_TIME_ADD(f_total, fcu->save_f_total_time);
1685 :
1686 : /* update counters in function stats table */
1687 0 : if (finalize)
1688 0 : fs->f_numcalls++;
1689 0 : fs->f_total_time = f_total;
1690 0 : INSTR_TIME_ADD(fs->f_self_time, f_self);
1691 :
1692 : /* indicate that we have something to send */
1693 0 : have_function_stats = true;
1694 : }
1695 :
1696 :
1697 : /* ----------
1698 : * pgstat_initstats() -
1699 : *
1700 : * Initialize a relcache entry to count access statistics.
1701 : * Called whenever a relation is opened.
1702 : *
1703 : * We assume that a relcache entry's pgstat_info field is zeroed by
1704 : * relcache.c when the relcache entry is made; thereafter it is long-lived
1705 : * data. We can avoid repeated searches of the TabStatus arrays when the
1706 : * same relation is touched repeatedly within a transaction.
1707 : * ----------
1708 : */
1709 : void
1710 1007309 : pgstat_initstats(Relation rel)
1711 : {
1712 1007309 : Oid rel_id = rel->rd_id;
1713 1007309 : char relkind = rel->rd_rel->relkind;
1714 :
1715 : /* We only count stats for things that have storage */
1716 1019940 : if (!(relkind == RELKIND_RELATION ||
1717 504514 : relkind == RELKIND_MATVIEW ||
1718 15171 : relkind == RELKIND_INDEX ||
1719 : relkind == RELKIND_TOASTVALUE ||
1720 : relkind == RELKIND_SEQUENCE))
1721 : {
1722 11514 : rel->pgstat_info = NULL;
1723 11514 : return;
1724 : }
1725 :
1726 995795 : if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
1727 : {
1728 : /* We're not counting at all */
1729 43136 : rel->pgstat_info = NULL;
1730 43136 : return;
1731 : }
1732 :
1733 : /*
1734 : * If we already set up this relation in the current transaction, nothing
1735 : * to do.
1736 : */
1737 1873833 : if (rel->pgstat_info != NULL &&
1738 921174 : rel->pgstat_info->t_id == rel_id)
1739 910204 : return;
1740 :
1741 : /* Else find or make the PgStat_TableStatus entry, and update link */
1742 42455 : rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared);
1743 : }
1744 :
1745 : /*
1746 : * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
1747 : */
1748 : static PgStat_TableStatus *
1749 42469 : get_tabstat_entry(Oid rel_id, bool isshared)
1750 : {
1751 : TabStatHashEntry *hash_entry;
1752 : PgStat_TableStatus *entry;
1753 : TabStatusArray *tsa;
1754 : bool found;
1755 :
1756 : /*
1757 : * Create hash table if we don't have it already.
1758 : */
1759 42469 : if (pgStatTabHash == NULL)
1760 : {
1761 : HASHCTL ctl;
1762 :
1763 728 : memset(&ctl, 0, sizeof(ctl));
1764 728 : ctl.keysize = sizeof(Oid);
1765 728 : ctl.entrysize = sizeof(TabStatHashEntry);
1766 :
1767 728 : pgStatTabHash = hash_create("pgstat TabStatusArray lookup hash table",
1768 : TABSTAT_QUANTUM,
1769 : &ctl,
1770 : HASH_ELEM | HASH_BLOBS);
1771 : }
1772 :
1773 : /*
1774 : * Find an entry or create a new one.
1775 : */
1776 42469 : hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_ENTER, &found);
1777 42469 : if (!found)
1778 : {
1779 : /* initialize new entry with null pointer */
1780 36448 : hash_entry->tsa_entry = NULL;
1781 : }
1782 :
1783 : /*
1784 : * If entry is already valid, we're done.
1785 : */
1786 42469 : if (hash_entry->tsa_entry)
1787 6021 : return hash_entry->tsa_entry;
1788 :
1789 : /*
1790 : * Locate the first pgStatTabList entry with free space, making a new list
1791 : * entry if needed. Note that we could get an OOM failure here, but if so
1792 : * we have left the hashtable and the list in a consistent state.
1793 : */
1794 36448 : if (pgStatTabList == NULL)
1795 : {
1796 : /* Set up first pgStatTabList entry */
1797 336 : pgStatTabList = (TabStatusArray *)
1798 336 : MemoryContextAllocZero(TopMemoryContext,
1799 : sizeof(TabStatusArray));
1800 : }
1801 :
1802 36448 : tsa = pgStatTabList;
1803 76050 : while (tsa->tsa_used >= TABSTAT_QUANTUM)
1804 : {
1805 3154 : if (tsa->tsa_next == NULL)
1806 57 : tsa->tsa_next = (TabStatusArray *)
1807 57 : MemoryContextAllocZero(TopMemoryContext,
1808 : sizeof(TabStatusArray));
1809 3154 : tsa = tsa->tsa_next;
1810 : }
1811 :
1812 : /*
1813 : * Allocate a PgStat_TableStatus entry within this list entry. We assume
1814 : * the entry was already zeroed, either at creation or after last use.
1815 : */
1816 36448 : entry = &tsa->tsa_entries[tsa->tsa_used++];
1817 36448 : entry->t_id = rel_id;
1818 36448 : entry->t_shared = isshared;
1819 :
1820 : /*
1821 : * Now we can fill the entry in pgStatTabHash.
1822 : */
1823 36448 : hash_entry->tsa_entry = entry;
1824 :
1825 36448 : return entry;
1826 : }
1827 :
1828 : /*
1829 : * find_tabstat_entry - find any existing PgStat_TableStatus entry for rel
1830 : *
1831 : * If no entry, return NULL, don't create a new one
1832 : *
1833 : * Note: if we got an error in the most recent execution of pgstat_report_stat,
1834 : * it's possible that an entry exists but there's no hashtable entry for it.
1835 : * That's okay, we'll treat this case as "doesn't exist".
1836 : */
1837 : PgStat_TableStatus *
1838 0 : find_tabstat_entry(Oid rel_id)
1839 : {
1840 : TabStatHashEntry *hash_entry;
1841 :
1842 : /* If hashtable doesn't exist, there are no entries at all */
1843 0 : if (!pgStatTabHash)
1844 0 : return NULL;
1845 :
1846 0 : hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_FIND, NULL);
1847 0 : if (!hash_entry)
1848 0 : return NULL;
1849 :
1850 : /* Note that this step could also return NULL, but that's correct */
1851 0 : return hash_entry->tsa_entry;
1852 : }
1853 :
1854 : /*
1855 : * get_tabstat_stack_level - add a new (sub)transaction stack entry if needed
1856 : */
1857 : static PgStat_SubXactStatus *
1858 26973 : get_tabstat_stack_level(int nest_level)
1859 : {
1860 : PgStat_SubXactStatus *xact_state;
1861 :
1862 26973 : xact_state = pgStatXactStack;
1863 26973 : if (xact_state == NULL || xact_state->nest_level != nest_level)
1864 : {
1865 9761 : xact_state = (PgStat_SubXactStatus *)
1866 9761 : MemoryContextAlloc(TopTransactionContext,
1867 : sizeof(PgStat_SubXactStatus));
1868 9761 : xact_state->nest_level = nest_level;
1869 9761 : xact_state->prev = pgStatXactStack;
1870 9761 : xact_state->first = NULL;
1871 9761 : pgStatXactStack = xact_state;
1872 : }
1873 26973 : return xact_state;
1874 : }
1875 :
1876 : /*
1877 : * add_tabstat_xact_level - add a new (sub)transaction state record
1878 : */
1879 : static void
1880 26956 : add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
1881 : {
1882 : PgStat_SubXactStatus *xact_state;
1883 : PgStat_TableXactStatus *trans;
1884 :
1885 : /*
1886 : * If this is the first rel to be modified at the current nest level, we
1887 : * first have to push a transaction stack entry.
1888 : */
1889 26956 : xact_state = get_tabstat_stack_level(nest_level);
1890 :
1891 : /* Now make a per-table stack entry */
1892 26956 : trans = (PgStat_TableXactStatus *)
1893 26956 : MemoryContextAllocZero(TopTransactionContext,
1894 : sizeof(PgStat_TableXactStatus));
1895 26956 : trans->nest_level = nest_level;
1896 26956 : trans->upper = pgstat_info->trans;
1897 26956 : trans->parent = pgstat_info;
1898 26956 : trans->next = xact_state->first;
1899 26956 : xact_state->first = trans;
1900 26956 : pgstat_info->trans = trans;
1901 26956 : }
1902 :
1903 : /*
1904 : * pgstat_count_heap_insert - count a tuple insertion of n tuples
1905 : */
1906 : void
1907 620561 : pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
1908 : {
1909 620561 : PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1910 :
1911 620561 : if (pgstat_info != NULL)
1912 : {
1913 : /* We have to log the effect at the proper transactional level */
1914 598739 : int nest_level = GetCurrentTransactionNestLevel();
1915 :
1916 1179662 : if (pgstat_info->trans == NULL ||
1917 580923 : pgstat_info->trans->nest_level != nest_level)
1918 17844 : add_tabstat_xact_level(pgstat_info, nest_level);
1919 :
1920 598739 : pgstat_info->trans->tuples_inserted += n;
1921 : }
1922 620561 : }
1923 :
1924 : /*
1925 : * pgstat_count_heap_update - count a tuple update
1926 : */
1927 : void
1928 9388 : pgstat_count_heap_update(Relation rel, bool hot)
1929 : {
1930 9388 : PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1931 :
1932 9388 : if (pgstat_info != NULL)
1933 : {
1934 : /* We have to log the effect at the proper transactional level */
1935 8931 : int nest_level = GetCurrentTransactionNestLevel();
1936 :
1937 15431 : if (pgstat_info->trans == NULL ||
1938 6500 : pgstat_info->trans->nest_level != nest_level)
1939 2443 : add_tabstat_xact_level(pgstat_info, nest_level);
1940 :
1941 8931 : pgstat_info->trans->tuples_updated++;
1942 :
1943 : /* t_tuples_hot_updated is nontransactional, so just advance it */
1944 8931 : if (hot)
1945 4899 : pgstat_info->t_counts.t_tuples_hot_updated++;
1946 : }
1947 9388 : }
1948 :
1949 : /*
1950 : * pgstat_count_heap_delete - count a tuple deletion
1951 : */
1952 : void
1953 109180 : pgstat_count_heap_delete(Relation rel)
1954 : {
1955 109180 : PgStat_TableStatus *pgstat_info = rel->pgstat_info;
1956 :
1957 109180 : if (pgstat_info != NULL)
1958 : {
1959 : /* We have to log the effect at the proper transactional level */
1960 109100 : int nest_level = GetCurrentTransactionNestLevel();
1961 :
1962 211642 : if (pgstat_info->trans == NULL ||
1963 102542 : pgstat_info->trans->nest_level != nest_level)
1964 6578 : add_tabstat_xact_level(pgstat_info, nest_level);
1965 :
1966 109100 : pgstat_info->trans->tuples_deleted++;
1967 : }
1968 109180 : }
1969 :
1970 : /*
1971 : * pgstat_truncate_save_counters
1972 : *
1973 : * Whenever a table is truncated, we save its i/u/d counters so that they can
1974 : * be cleared, and if the (sub)xact that executed the truncate later aborts,
1975 : * the counters can be restored to the saved (pre-truncate) values. Note we do
1976 : * this on the first truncate in any particular subxact level only.
1977 : */
1978 : static void
1979 98 : pgstat_truncate_save_counters(PgStat_TableXactStatus *trans)
1980 : {
1981 98 : if (!trans->truncated)
1982 : {
1983 96 : trans->inserted_pre_trunc = trans->tuples_inserted;
1984 96 : trans->updated_pre_trunc = trans->tuples_updated;
1985 96 : trans->deleted_pre_trunc = trans->tuples_deleted;
1986 96 : trans->truncated = true;
1987 : }
1988 98 : }
1989 :
1990 : /*
1991 : * pgstat_truncate_restore_counters - restore counters when a truncate aborts
1992 : */
1993 : static void
1994 1241 : pgstat_truncate_restore_counters(PgStat_TableXactStatus *trans)
1995 : {
1996 1241 : if (trans->truncated)
1997 : {
1998 16 : trans->tuples_inserted = trans->inserted_pre_trunc;
1999 16 : trans->tuples_updated = trans->updated_pre_trunc;
2000 16 : trans->tuples_deleted = trans->deleted_pre_trunc;
2001 : }
2002 1241 : }
2003 :
2004 : /*
2005 : * pgstat_count_truncate - update tuple counters due to truncate
2006 : */
2007 : void
2008 95 : pgstat_count_truncate(Relation rel)
2009 : {
2010 95 : PgStat_TableStatus *pgstat_info = rel->pgstat_info;
2011 :
2012 95 : if (pgstat_info != NULL)
2013 : {
2014 : /* We have to log the effect at the proper transactional level */
2015 95 : int nest_level = GetCurrentTransactionNestLevel();
2016 :
2017 101 : if (pgstat_info->trans == NULL ||
2018 6 : pgstat_info->trans->nest_level != nest_level)
2019 91 : add_tabstat_xact_level(pgstat_info, nest_level);
2020 :
2021 95 : pgstat_truncate_save_counters(pgstat_info->trans);
2022 95 : pgstat_info->trans->tuples_inserted = 0;
2023 95 : pgstat_info->trans->tuples_updated = 0;
2024 95 : pgstat_info->trans->tuples_deleted = 0;
2025 : }
2026 95 : }
2027 :
2028 : /*
2029 : * pgstat_update_heap_dead_tuples - update dead-tuples count
2030 : *
2031 : * The semantics of this are that we are reporting the nontransactional
2032 : * recovery of "delta" dead tuples; so t_delta_dead_tuples decreases
2033 : * rather than increasing, and the change goes straight into the per-table
2034 : * counter, not into transactional state.
2035 : */
2036 : void
2037 706 : pgstat_update_heap_dead_tuples(Relation rel, int delta)
2038 : {
2039 706 : PgStat_TableStatus *pgstat_info = rel->pgstat_info;
2040 :
2041 706 : if (pgstat_info != NULL)
2042 632 : pgstat_info->t_counts.t_delta_dead_tuples -= delta;
2043 706 : }
2044 :
2045 :
2046 : /* ----------
2047 : * AtEOXact_PgStat
2048 : *
2049 : * Called from access/transam/xact.c at top-level transaction commit/abort.
2050 : * ----------
2051 : */
2052 : void
2053 26218 : AtEOXact_PgStat(bool isCommit)
2054 : {
2055 : PgStat_SubXactStatus *xact_state;
2056 :
2057 : /*
2058 : * Count transaction commit or abort. (We use counters, not just bools,
2059 : * in case the reporting message isn't sent right away.)
2060 : */
2061 26218 : if (isCommit)
2062 22909 : pgStatXactCommit++;
2063 : else
2064 3309 : pgStatXactRollback++;
2065 :
2066 : /*
2067 : * Transfer transactional insert/update counts into the base tabstat
2068 : * entries. We don't bother to free any of the transactional state, since
2069 : * it's all in TopTransactionContext and will go away anyway.
2070 : */
2071 26218 : xact_state = pgStatXactStack;
2072 26218 : if (xact_state != NULL)
2073 : {
2074 : PgStat_TableXactStatus *trans;
2075 :
2076 9694 : Assert(xact_state->nest_level == 1);
2077 9694 : Assert(xact_state->prev == NULL);
2078 36544 : for (trans = xact_state->first; trans != NULL; trans = trans->next)
2079 : {
2080 : PgStat_TableStatus *tabstat;
2081 :
2082 26850 : Assert(trans->nest_level == 1);
2083 26850 : Assert(trans->upper == NULL);
2084 26850 : tabstat = trans->parent;
2085 26850 : Assert(tabstat->trans == trans);
2086 : /* restore pre-truncate stats (if any) in case of aborted xact */
2087 26850 : if (!isCommit)
2088 1169 : pgstat_truncate_restore_counters(trans);
2089 : /* count attempted actions regardless of commit/abort */
2090 26850 : tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
2091 26850 : tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
2092 26850 : tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
2093 26850 : if (isCommit)
2094 : {
2095 25681 : tabstat->t_counts.t_truncated = trans->truncated;
2096 25681 : if (trans->truncated)
2097 : {
2098 : /* forget live/dead stats seen by backend thus far */
2099 77 : tabstat->t_counts.t_delta_live_tuples = 0;
2100 77 : tabstat->t_counts.t_delta_dead_tuples = 0;
2101 : }
2102 : /* insert adds a live tuple, delete removes one */
2103 51362 : tabstat->t_counts.t_delta_live_tuples +=
2104 25681 : trans->tuples_inserted - trans->tuples_deleted;
2105 : /* update and delete each create a dead tuple */
2106 51362 : tabstat->t_counts.t_delta_dead_tuples +=
2107 25681 : trans->tuples_updated + trans->tuples_deleted;
2108 : /* insert, update, delete each count as one change event */
2109 51362 : tabstat->t_counts.t_changed_tuples +=
2110 51362 : trans->tuples_inserted + trans->tuples_updated +
2111 25681 : trans->tuples_deleted;
2112 : }
2113 : else
2114 : {
2115 : /* inserted tuples are dead, deleted tuples are unaffected */
2116 2338 : tabstat->t_counts.t_delta_dead_tuples +=
2117 1169 : trans->tuples_inserted + trans->tuples_updated;
2118 : /* an aborted xact generates no changed_tuple events */
2119 : }
2120 26850 : tabstat->trans = NULL;
2121 : }
2122 : }
2123 26218 : pgStatXactStack = NULL;
2124 :
2125 : /* Make sure any stats snapshot is thrown away */
2126 26218 : pgstat_clear_snapshot();
2127 26218 : }
2128 :
2129 : /* ----------
2130 : * AtEOSubXact_PgStat
2131 : *
2132 : * Called from access/transam/xact.c at subtransaction commit/abort.
2133 : * ----------
2134 : */
2135 : void
2136 372 : AtEOSubXact_PgStat(bool isCommit, int nestDepth)
2137 : {
2138 : PgStat_SubXactStatus *xact_state;
2139 :
2140 : /*
2141 : * Transfer transactional insert/update counts into the next higher
2142 : * subtransaction state.
2143 : */
2144 372 : xact_state = pgStatXactStack;
2145 475 : if (xact_state != NULL &&
2146 103 : xact_state->nest_level >= nestDepth)
2147 : {
2148 : PgStat_TableXactStatus *trans;
2149 : PgStat_TableXactStatus *next_trans;
2150 :
2151 : /* delink xact_state from stack immediately to simplify reuse case */
2152 61 : pgStatXactStack = xact_state->prev;
2153 :
2154 170 : for (trans = xact_state->first; trans != NULL; trans = next_trans)
2155 : {
2156 : PgStat_TableStatus *tabstat;
2157 :
2158 109 : next_trans = trans->next;
2159 109 : Assert(trans->nest_level == nestDepth);
2160 109 : tabstat = trans->parent;
2161 109 : Assert(tabstat->trans == trans);
2162 109 : if (isCommit)
2163 : {
2164 37 : if (trans->upper && trans->upper->nest_level == nestDepth - 1)
2165 : {
2166 20 : if (trans->truncated)
2167 : {
2168 : /* propagate the truncate status one level up */
2169 3 : pgstat_truncate_save_counters(trans->upper);
2170 : /* replace upper xact stats with ours */
2171 3 : trans->upper->tuples_inserted = trans->tuples_inserted;
2172 3 : trans->upper->tuples_updated = trans->tuples_updated;
2173 3 : trans->upper->tuples_deleted = trans->tuples_deleted;
2174 : }
2175 : else
2176 : {
2177 17 : trans->upper->tuples_inserted += trans->tuples_inserted;
2178 17 : trans->upper->tuples_updated += trans->tuples_updated;
2179 17 : trans->upper->tuples_deleted += trans->tuples_deleted;
2180 : }
2181 20 : tabstat->trans = trans->upper;
2182 20 : pfree(trans);
2183 : }
2184 : else
2185 : {
2186 : /*
2187 : * When there isn't an immediate parent state, we can just
2188 : * reuse the record instead of going through a
2189 : * palloc/pfree pushup (this works since it's all in
2190 : * TopTransactionContext anyway). We have to re-link it
2191 : * into the parent level, though, and that might mean
2192 : * pushing a new entry into the pgStatXactStack.
2193 : */
2194 : PgStat_SubXactStatus *upper_xact_state;
2195 :
2196 17 : upper_xact_state = get_tabstat_stack_level(nestDepth - 1);
2197 17 : trans->next = upper_xact_state->first;
2198 17 : upper_xact_state->first = trans;
2199 17 : trans->nest_level = nestDepth - 1;
2200 : }
2201 : }
2202 : else
2203 : {
2204 : /*
2205 : * On abort, update top-level tabstat counts, then forget the
2206 : * subtransaction
2207 : */
2208 :
2209 : /* first restore values obliterated by truncate */
2210 72 : pgstat_truncate_restore_counters(trans);
2211 : /* count attempted actions regardless of commit/abort */
2212 72 : tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
2213 72 : tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
2214 72 : tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
2215 : /* inserted tuples are dead, deleted tuples are unaffected */
2216 144 : tabstat->t_counts.t_delta_dead_tuples +=
2217 72 : trans->tuples_inserted + trans->tuples_updated;
2218 72 : tabstat->trans = trans->upper;
2219 72 : pfree(trans);
2220 : }
2221 : }
2222 61 : pfree(xact_state);
2223 : }
2224 372 : }
2225 :
2226 :
2227 : /*
2228 : * AtPrepare_PgStat
2229 : * Save the transactional stats state at 2PC transaction prepare.
2230 : *
2231 : * In this phase we just generate 2PC records for all the pending
2232 : * transaction-dependent stats work.
2233 : */
2234 : void
2235 6 : AtPrepare_PgStat(void)
2236 : {
2237 : PgStat_SubXactStatus *xact_state;
2238 :
2239 6 : xact_state = pgStatXactStack;
2240 6 : if (xact_state != NULL)
2241 : {
2242 : PgStat_TableXactStatus *trans;
2243 :
2244 6 : Assert(xact_state->nest_level == 1);
2245 6 : Assert(xact_state->prev == NULL);
2246 20 : for (trans = xact_state->first; trans != NULL; trans = trans->next)
2247 : {
2248 : PgStat_TableStatus *tabstat;
2249 : TwoPhasePgStatRecord record;
2250 :
2251 14 : Assert(trans->nest_level == 1);
2252 14 : Assert(trans->upper == NULL);
2253 14 : tabstat = trans->parent;
2254 14 : Assert(tabstat->trans == trans);
2255 :
2256 14 : record.tuples_inserted = trans->tuples_inserted;
2257 14 : record.tuples_updated = trans->tuples_updated;
2258 14 : record.tuples_deleted = trans->tuples_deleted;
2259 14 : record.inserted_pre_trunc = trans->inserted_pre_trunc;
2260 14 : record.updated_pre_trunc = trans->updated_pre_trunc;
2261 14 : record.deleted_pre_trunc = trans->deleted_pre_trunc;
2262 14 : record.t_id = tabstat->t_id;
2263 14 : record.t_shared = tabstat->t_shared;
2264 14 : record.t_truncated = trans->truncated;
2265 :
2266 14 : RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0,
2267 : &record, sizeof(TwoPhasePgStatRecord));
2268 : }
2269 : }
2270 6 : }
2271 :
2272 : /*
2273 : * PostPrepare_PgStat
2274 : * Clean up after successful PREPARE.
2275 : *
2276 : * All we need do here is unlink the transaction stats state from the
2277 : * nontransactional state. The nontransactional action counts will be
2278 : * reported to the stats collector immediately, while the effects on live
2279 : * and dead tuple counts are preserved in the 2PC state file.
2280 : *
2281 : * Note: AtEOXact_PgStat is not called during PREPARE.
2282 : */
2283 : void
2284 6 : PostPrepare_PgStat(void)
2285 : {
2286 : PgStat_SubXactStatus *xact_state;
2287 :
2288 : /*
2289 : * We don't bother to free any of the transactional state, since it's all
2290 : * in TopTransactionContext and will go away anyway.
2291 : */
2292 6 : xact_state = pgStatXactStack;
2293 6 : if (xact_state != NULL)
2294 : {
2295 : PgStat_TableXactStatus *trans;
2296 :
2297 20 : for (trans = xact_state->first; trans != NULL; trans = trans->next)
2298 : {
2299 : PgStat_TableStatus *tabstat;
2300 :
2301 14 : tabstat = trans->parent;
2302 14 : tabstat->trans = NULL;
2303 : }
2304 : }
2305 6 : pgStatXactStack = NULL;
2306 :
2307 : /* Make sure any stats snapshot is thrown away */
2308 6 : pgstat_clear_snapshot();
2309 6 : }
2310 :
2311 : /*
2312 : * 2PC processing routine for COMMIT PREPARED case.
2313 : *
2314 : * Load the saved counts into our local pgstats state.
2315 : */
2316 : void
2317 11 : pgstat_twophase_postcommit(TransactionId xid, uint16 info,
2318 : void *recdata, uint32 len)
2319 : {
2320 11 : TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
2321 : PgStat_TableStatus *pgstat_info;
2322 :
2323 : /* Find or create a tabstat entry for the rel */
2324 11 : pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
2325 :
2326 : /* Same math as in AtEOXact_PgStat, commit case */
2327 11 : pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
2328 11 : pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
2329 11 : pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
2330 11 : pgstat_info->t_counts.t_truncated = rec->t_truncated;
2331 11 : if (rec->t_truncated)
2332 : {
2333 : /* forget live/dead stats seen by backend thus far */
2334 0 : pgstat_info->t_counts.t_delta_live_tuples = 0;
2335 0 : pgstat_info->t_counts.t_delta_dead_tuples = 0;
2336 : }
2337 22 : pgstat_info->t_counts.t_delta_live_tuples +=
2338 11 : rec->tuples_inserted - rec->tuples_deleted;
2339 22 : pgstat_info->t_counts.t_delta_dead_tuples +=
2340 11 : rec->tuples_updated + rec->tuples_deleted;
2341 22 : pgstat_info->t_counts.t_changed_tuples +=
2342 22 : rec->tuples_inserted + rec->tuples_updated +
2343 11 : rec->tuples_deleted;
2344 11 : }
2345 :
2346 : /*
2347 : * 2PC processing routine for ROLLBACK PREPARED case.
2348 : *
2349 : * Load the saved counts into our local pgstats state, but treat them
2350 : * as aborted.
2351 : */
2352 : void
2353 3 : pgstat_twophase_postabort(TransactionId xid, uint16 info,
2354 : void *recdata, uint32 len)
2355 : {
2356 3 : TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
2357 : PgStat_TableStatus *pgstat_info;
2358 :
2359 : /* Find or create a tabstat entry for the rel */
2360 3 : pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
2361 :
2362 : /* Same math as in AtEOXact_PgStat, abort case */
2363 3 : if (rec->t_truncated)
2364 : {
2365 0 : rec->tuples_inserted = rec->inserted_pre_trunc;
2366 0 : rec->tuples_updated = rec->updated_pre_trunc;
2367 0 : rec->tuples_deleted = rec->deleted_pre_trunc;
2368 : }
2369 3 : pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
2370 3 : pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
2371 3 : pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
2372 6 : pgstat_info->t_counts.t_delta_dead_tuples +=
2373 3 : rec->tuples_inserted + rec->tuples_updated;
2374 3 : }
2375 :
2376 :
2377 : /* ----------
2378 : * pgstat_fetch_stat_dbentry() -
2379 : *
2380 : * Support function for the SQL-callable pgstat* functions. Returns
2381 : * the collected statistics for one database or NULL. NULL doesn't mean
2382 : * that the database doesn't exist, it is just not yet known by the
2383 : * collector, so the caller is better off to report ZERO instead.
2384 : * ----------
2385 : */
2386 : PgStat_StatDBEntry *
2387 121 : pgstat_fetch_stat_dbentry(Oid dbid)
2388 : {
2389 : /*
2390 : * If not done for this transaction, read the statistics collector stats
2391 : * file into some hash tables.
2392 : */
2393 121 : backend_read_statsfile();
2394 :
2395 : /*
2396 : * Lookup the requested database; return NULL if not found
2397 : */
2398 121 : return (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2399 : (void *) &dbid,
2400 : HASH_FIND, NULL);
2401 : }
2402 :
2403 :
2404 : /* ----------
2405 : * pgstat_fetch_stat_tabentry() -
2406 : *
2407 : * Support function for the SQL-callable pgstat* functions. Returns
2408 : * the collected statistics for one table or NULL. NULL doesn't mean
2409 : * that the table doesn't exist, it is just not yet known by the
2410 : * collector, so the caller is better off to report ZERO instead.
2411 : * ----------
2412 : */
2413 : PgStat_StatTabEntry *
2414 92 : pgstat_fetch_stat_tabentry(Oid relid)
2415 : {
2416 : Oid dbid;
2417 : PgStat_StatDBEntry *dbentry;
2418 : PgStat_StatTabEntry *tabentry;
2419 :
2420 : /*
2421 : * If not done for this transaction, read the statistics collector stats
2422 : * file into some hash tables.
2423 : */
2424 92 : backend_read_statsfile();
2425 :
2426 : /*
2427 : * Lookup our database, then look in its table hash table.
2428 : */
2429 92 : dbid = MyDatabaseId;
2430 92 : dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2431 : (void *) &dbid,
2432 : HASH_FIND, NULL);
2433 92 : if (dbentry != NULL && dbentry->tables != NULL)
2434 : {
2435 92 : tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2436 : (void *) &relid,
2437 : HASH_FIND, NULL);
2438 92 : if (tabentry)
2439 88 : return tabentry;
2440 : }
2441 :
2442 : /*
2443 : * If we didn't find it, maybe it's a shared table.
2444 : */
2445 4 : dbid = InvalidOid;
2446 4 : dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2447 : (void *) &dbid,
2448 : HASH_FIND, NULL);
2449 4 : if (dbentry != NULL && dbentry->tables != NULL)
2450 : {
2451 4 : tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2452 : (void *) &relid,
2453 : HASH_FIND, NULL);
2454 4 : if (tabentry)
2455 0 : return tabentry;
2456 : }
2457 :
2458 4 : return NULL;
2459 : }
2460 :
2461 :
2462 : /* ----------
2463 : * pgstat_fetch_stat_funcentry() -
2464 : *
2465 : * Support function for the SQL-callable pgstat* functions. Returns
2466 : * the collected statistics for one function or NULL.
2467 : * ----------
2468 : */
2469 : PgStat_StatFuncEntry *
2470 0 : pgstat_fetch_stat_funcentry(Oid func_id)
2471 : {
2472 : PgStat_StatDBEntry *dbentry;
2473 0 : PgStat_StatFuncEntry *funcentry = NULL;
2474 :
2475 : /* load the stats file if needed */
2476 0 : backend_read_statsfile();
2477 :
2478 : /* Lookup our database, then find the requested function. */
2479 0 : dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
2480 0 : if (dbentry != NULL && dbentry->functions != NULL)
2481 : {
2482 0 : funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions,
2483 : (void *) &func_id,
2484 : HASH_FIND, NULL);
2485 : }
2486 :
2487 0 : return funcentry;
2488 : }
2489 :
2490 :
2491 : /* ----------
2492 : * pgstat_fetch_stat_beentry() -
2493 : *
2494 : * Support function for the SQL-callable pgstat* functions. Returns
2495 : * our local copy of the current-activity entry for one backend.
2496 : *
2497 : * NB: caller is responsible for a check if the user is permitted to see
2498 : * this info (especially the querystring).
2499 : * ----------
2500 : */
2501 : PgBackendStatus *
2502 0 : pgstat_fetch_stat_beentry(int beid)
2503 : {
2504 0 : pgstat_read_current_status();
2505 :
2506 0 : if (beid < 1 || beid > localNumBackends)
2507 0 : return NULL;
2508 :
2509 0 : return &localBackendStatusTable[beid - 1].backendStatus;
2510 : }
2511 :
2512 :
2513 : /* ----------
2514 : * pgstat_fetch_stat_local_beentry() -
2515 : *
2516 : * Like pgstat_fetch_stat_beentry() but with locally computed additions (like
2517 : * xid and xmin values of the backend)
2518 : *
2519 : * NB: caller is responsible for a check if the user is permitted to see
2520 : * this info (especially the querystring).
2521 : * ----------
2522 : */
2523 : LocalPgBackendStatus *
2524 0 : pgstat_fetch_stat_local_beentry(int beid)
2525 : {
2526 0 : pgstat_read_current_status();
2527 :
2528 0 : if (beid < 1 || beid > localNumBackends)
2529 0 : return NULL;
2530 :
2531 0 : return &localBackendStatusTable[beid - 1];
2532 : }
2533 :
2534 :
2535 : /* ----------
2536 : * pgstat_fetch_stat_numbackends() -
2537 : *
2538 : * Support function for the SQL-callable pgstat* functions. Returns
2539 : * the maximum current backend id.
2540 : * ----------
2541 : */
2542 : int
2543 0 : pgstat_fetch_stat_numbackends(void)
2544 : {
2545 0 : pgstat_read_current_status();
2546 :
2547 0 : return localNumBackends;
2548 : }
2549 :
2550 : /*
2551 : * ---------
2552 : * pgstat_fetch_stat_archiver() -
2553 : *
2554 : * Support function for the SQL-callable pgstat* functions. Returns
2555 : * a pointer to the archiver statistics struct.
2556 : * ---------
2557 : */
2558 : PgStat_ArchiverStats *
2559 0 : pgstat_fetch_stat_archiver(void)
2560 : {
2561 0 : backend_read_statsfile();
2562 :
2563 0 : return &archiverStats;
2564 : }
2565 :
2566 :
2567 : /*
2568 : * ---------
2569 : * pgstat_fetch_global() -
2570 : *
2571 : * Support function for the SQL-callable pgstat* functions. Returns
2572 : * a pointer to the global statistics struct.
2573 : * ---------
2574 : */
2575 : PgStat_GlobalStats *
2576 7 : pgstat_fetch_global(void)
2577 : {
2578 7 : backend_read_statsfile();
2579 :
2580 7 : return &globalStats;
2581 : }
2582 :
2583 :
2584 : /* ------------------------------------------------------------
2585 : * Functions for management of the shared-memory PgBackendStatus array
2586 : * ------------------------------------------------------------
2587 : */
2588 :
2589 : static PgBackendStatus *BackendStatusArray = NULL;
2590 : static PgBackendStatus *MyBEEntry = NULL;
2591 : static char *BackendAppnameBuffer = NULL;
2592 : static char *BackendClientHostnameBuffer = NULL;
2593 : static char *BackendActivityBuffer = NULL;
2594 : static Size BackendActivityBufferSize = 0;
2595 : #ifdef USE_SSL
2596 : static PgBackendSSLStatus *BackendSslStatusBuffer = NULL;
2597 : #endif
2598 :
2599 :
2600 : /*
2601 : * Report shared-memory space needed by CreateSharedBackendStatus.
2602 : */
2603 : Size
2604 5 : BackendStatusShmemSize(void)
2605 : {
2606 : Size size;
2607 :
2608 : /* BackendStatusArray: */
2609 5 : size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
2610 : /* BackendAppnameBuffer: */
2611 5 : size = add_size(size,
2612 5 : mul_size(NAMEDATALEN, NumBackendStatSlots));
2613 : /* BackendClientHostnameBuffer: */
2614 5 : size = add_size(size,
2615 5 : mul_size(NAMEDATALEN, NumBackendStatSlots));
2616 : /* BackendActivityBuffer: */
2617 5 : size = add_size(size,
2618 5 : mul_size(pgstat_track_activity_query_size, NumBackendStatSlots));
2619 : #ifdef USE_SSL
2620 : /* BackendSslStatusBuffer: */
2621 : size = add_size(size,
2622 : mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots));
2623 : #endif
2624 5 : return size;
2625 : }
2626 :
2627 : /*
2628 : * Initialize the shared status array and several string buffers
2629 : * during postmaster startup.
2630 : */
2631 : void
2632 5 : CreateSharedBackendStatus(void)
2633 : {
2634 : Size size;
2635 : bool found;
2636 : int i;
2637 : char *buffer;
2638 :
2639 : /* Create or attach to the shared array */
2640 5 : size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
2641 5 : BackendStatusArray = (PgBackendStatus *)
2642 5 : ShmemInitStruct("Backend Status Array", size, &found);
2643 :
2644 5 : if (!found)
2645 : {
2646 : /*
2647 : * We're the first - initialize.
2648 : */
2649 5 : MemSet(BackendStatusArray, 0, size);
2650 : }
2651 :
2652 : /* Create or attach to the shared appname buffer */
2653 5 : size = mul_size(NAMEDATALEN, MaxBackends);
2654 5 : BackendAppnameBuffer = (char *)
2655 5 : ShmemInitStruct("Backend Application Name Buffer", size, &found);
2656 :
2657 5 : if (!found)
2658 : {
2659 5 : MemSet(BackendAppnameBuffer, 0, size);
2660 :
2661 : /* Initialize st_appname pointers. */
2662 5 : buffer = BackendAppnameBuffer;
2663 600 : for (i = 0; i < NumBackendStatSlots; i++)
2664 : {
2665 595 : BackendStatusArray[i].st_appname = buffer;
2666 595 : buffer += NAMEDATALEN;
2667 : }
2668 : }
2669 :
2670 : /* Create or attach to the shared client hostname buffer */
2671 5 : size = mul_size(NAMEDATALEN, MaxBackends);
2672 5 : BackendClientHostnameBuffer = (char *)
2673 5 : ShmemInitStruct("Backend Client Host Name Buffer", size, &found);
2674 :
2675 5 : if (!found)
2676 : {
2677 5 : MemSet(BackendClientHostnameBuffer, 0, size);
2678 :
2679 : /* Initialize st_clienthostname pointers. */
2680 5 : buffer = BackendClientHostnameBuffer;
2681 600 : for (i = 0; i < NumBackendStatSlots; i++)
2682 : {
2683 595 : BackendStatusArray[i].st_clienthostname = buffer;
2684 595 : buffer += NAMEDATALEN;
2685 : }
2686 : }
2687 :
2688 : /* Create or attach to the shared activity buffer */
2689 5 : BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size,
2690 5 : NumBackendStatSlots);
2691 5 : BackendActivityBuffer = (char *)
2692 5 : ShmemInitStruct("Backend Activity Buffer",
2693 : BackendActivityBufferSize,
2694 : &found);
2695 :
2696 5 : if (!found)
2697 : {
2698 5 : MemSet(BackendActivityBuffer, 0, size);
2699 :
2700 : /* Initialize st_activity pointers. */
2701 5 : buffer = BackendActivityBuffer;
2702 600 : for (i = 0; i < NumBackendStatSlots; i++)
2703 : {
2704 595 : BackendStatusArray[i].st_activity = buffer;
2705 595 : buffer += pgstat_track_activity_query_size;
2706 : }
2707 : }
2708 :
2709 : #ifdef USE_SSL
2710 : /* Create or attach to the shared SSL status buffer */
2711 : size = mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots);
2712 : BackendSslStatusBuffer = (PgBackendSSLStatus *)
2713 : ShmemInitStruct("Backend SSL Status Buffer", size, &found);
2714 :
2715 : if (!found)
2716 : {
2717 : PgBackendSSLStatus *ptr;
2718 :
2719 : MemSet(BackendSslStatusBuffer, 0, size);
2720 :
2721 : /* Initialize st_sslstatus pointers. */
2722 : ptr = BackendSslStatusBuffer;
2723 : for (i = 0; i < NumBackendStatSlots; i++)
2724 : {
2725 : BackendStatusArray[i].st_sslstatus = ptr;
2726 : ptr++;
2727 : }
2728 : }
2729 : #endif
2730 5 : }
2731 :
2732 :
2733 : /* ----------
2734 : * pgstat_initialize() -
2735 : *
2736 : * Initialize pgstats state, and set up our on-proc-exit hook.
2737 : * Called from InitPostgres and AuxiliaryProcessMain. For auxiliary process,
2738 : * MyBackendId is invalid. Otherwise, MyBackendId must be set,
2739 : * but we must not have started any transaction yet (since the
2740 : * exit hook must run after the last transaction exit).
2741 : * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
2742 : * ----------
2743 : */
2744 : void
2745 341 : pgstat_initialize(void)
2746 : {
2747 : /* Initialize MyBEEntry */
2748 341 : if (MyBackendId != InvalidBackendId)
2749 : {
2750 337 : Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends);
2751 337 : MyBEEntry = &BackendStatusArray[MyBackendId - 1];
2752 : }
2753 : else
2754 : {
2755 : /* Must be an auxiliary process */
2756 4 : Assert(MyAuxProcType != NotAnAuxProcess);
2757 :
2758 : /*
2759 : * Assign the MyBEEntry for an auxiliary process. Since it doesn't
2760 : * have a BackendId, the slot is statically allocated based on the
2761 : * auxiliary process type (MyAuxProcType). Backends use slots indexed
2762 : * in the range from 1 to MaxBackends (inclusive), so we use
2763 : * MaxBackends + AuxBackendType + 1 as the index of the slot for an
2764 : * auxiliary process.
2765 : */
2766 4 : MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType];
2767 : }
2768 :
2769 : /* Set up a process-exit hook to clean up */
2770 341 : on_shmem_exit(pgstat_beshutdown_hook, 0);
2771 341 : }
2772 :
2773 : /* ----------
2774 : * pgstat_bestart() -
2775 : *
2776 : * Initialize this backend's entry in the PgBackendStatus array.
2777 : * Called from InitPostgres.
2778 : *
2779 : * Apart from auxiliary processes, MyBackendId, MyDatabaseId,
2780 : * session userid, and application_name must be set for a
2781 : * backend (hence, this cannot be combined with pgstat_initialize).
2782 : * ----------
2783 : */
2784 : void
2785 341 : pgstat_bestart(void)
2786 : {
2787 : TimestampTz proc_start_timestamp;
2788 : SockAddr clientaddr;
2789 : volatile PgBackendStatus *beentry;
2790 :
2791 : /*
2792 : * To minimize the time spent modifying the PgBackendStatus entry, fetch
2793 : * all the needed data first.
2794 : *
2795 : * If we have a MyProcPort, use its session start time (for consistency,
2796 : * and to save a kernel call).
2797 : */
2798 341 : if (MyProcPort)
2799 216 : proc_start_timestamp = MyProcPort->SessionStartTime;
2800 : else
2801 125 : proc_start_timestamp = GetCurrentTimestamp();
2802 :
2803 : /*
2804 : * We may not have a MyProcPort (eg, if this is the autovacuum process).
2805 : * If so, use all-zeroes client address, which is dealt with specially in
2806 : * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
2807 : */
2808 341 : if (MyProcPort)
2809 216 : memcpy(&clientaddr, &MyProcPort->raddr, sizeof(clientaddr));
2810 : else
2811 125 : MemSet(&clientaddr, 0, sizeof(clientaddr));
2812 :
2813 : /*
2814 : * Initialize my status entry, following the protocol of bumping
2815 : * st_changecount before and after; and make sure it's even afterwards. We
2816 : * use a volatile pointer here to ensure the compiler doesn't try to get
2817 : * cute.
2818 : */
2819 341 : beentry = MyBEEntry;
2820 :
2821 : /* pgstats state must be initialized from pgstat_initialize() */
2822 341 : Assert(beentry != NULL);
2823 :
2824 341 : if (MyBackendId != InvalidBackendId)
2825 : {
2826 337 : if (IsAutoVacuumLauncherProcess())
2827 : {
2828 : /* Autovacuum Launcher */
2829 1 : beentry->st_backendType = B_AUTOVAC_LAUNCHER;
2830 : }
2831 336 : else if (IsAutoVacuumWorkerProcess())
2832 : {
2833 : /* Autovacuum Worker */
2834 3 : beentry->st_backendType = B_AUTOVAC_WORKER;
2835 : }
2836 333 : else if (am_walsender)
2837 : {
2838 : /* Wal sender */
2839 0 : beentry->st_backendType = B_WAL_SENDER;
2840 : }
2841 333 : else if (IsBackgroundWorker)
2842 : {
2843 : /* bgworker */
2844 116 : beentry->st_backendType = B_BG_WORKER;
2845 : }
2846 : else
2847 : {
2848 : /* client-backend */
2849 217 : beentry->st_backendType = B_BACKEND;
2850 : }
2851 : }
2852 : else
2853 : {
2854 : /* Must be an auxiliary process */
2855 4 : Assert(MyAuxProcType != NotAnAuxProcess);
2856 4 : switch (MyAuxProcType)
2857 : {
2858 : case StartupProcess:
2859 1 : beentry->st_backendType = B_STARTUP;
2860 1 : break;
2861 : case BgWriterProcess:
2862 1 : beentry->st_backendType = B_BG_WRITER;
2863 1 : break;
2864 : case CheckpointerProcess:
2865 1 : beentry->st_backendType = B_CHECKPOINTER;
2866 1 : break;
2867 : case WalWriterProcess:
2868 1 : beentry->st_backendType = B_WAL_WRITER;
2869 1 : break;
2870 : case WalReceiverProcess:
2871 0 : beentry->st_backendType = B_WAL_RECEIVER;
2872 0 : break;
2873 : default:
2874 0 : elog(FATAL, "unrecognized process type: %d",
2875 : (int) MyAuxProcType);
2876 : proc_exit(1);
2877 : }
2878 : }
2879 :
2880 : do
2881 : {
2882 341 : pgstat_increment_changecount_before(beentry);
2883 341 : } while ((beentry->st_changecount & 1) == 0);
2884 :
2885 341 : beentry->st_procpid = MyProcPid;
2886 341 : beentry->st_proc_start_timestamp = proc_start_timestamp;
2887 341 : beentry->st_activity_start_timestamp = 0;
2888 341 : beentry->st_state_start_timestamp = 0;
2889 341 : beentry->st_xact_start_timestamp = 0;
2890 341 : beentry->st_databaseid = MyDatabaseId;
2891 :
2892 : /* We have userid for client-backends, wal-sender and bgworker processes */
2893 341 : if (beentry->st_backendType == B_BACKEND
2894 124 : || beentry->st_backendType == B_WAL_SENDER
2895 124 : || beentry->st_backendType == B_BG_WORKER)
2896 333 : beentry->st_userid = GetSessionUserId();
2897 : else
2898 8 : beentry->st_userid = InvalidOid;
2899 :
2900 341 : beentry->st_clientaddr = clientaddr;
2901 341 : if (MyProcPort && MyProcPort->remote_hostname)
2902 0 : strlcpy(beentry->st_clienthostname, MyProcPort->remote_hostname,
2903 : NAMEDATALEN);
2904 : else
2905 341 : beentry->st_clienthostname[0] = '\0';
2906 : #ifdef USE_SSL
2907 : if (MyProcPort && MyProcPort->ssl != NULL)
2908 : {
2909 : beentry->st_ssl = true;
2910 : beentry->st_sslstatus->ssl_bits = be_tls_get_cipher_bits(MyProcPort);
2911 : beentry->st_sslstatus->ssl_compression = be_tls_get_compression(MyProcPort);
2912 : be_tls_get_version(MyProcPort, beentry->st_sslstatus->ssl_version, NAMEDATALEN);
2913 : be_tls_get_cipher(MyProcPort, beentry->st_sslstatus->ssl_cipher, NAMEDATALEN);
2914 : be_tls_get_peerdn_name(MyProcPort, beentry->st_sslstatus->ssl_clientdn, NAMEDATALEN);
2915 : }
2916 : else
2917 : {
2918 : beentry->st_ssl = false;
2919 : }
2920 : #else
2921 341 : beentry->st_ssl = false;
2922 : #endif
2923 341 : beentry->st_state = STATE_UNDEFINED;
2924 341 : beentry->st_appname[0] = '\0';
2925 341 : beentry->st_activity[0] = '\0';
2926 : /* Also make sure the last byte in each string area is always 0 */
2927 341 : beentry->st_clienthostname[NAMEDATALEN - 1] = '\0';
2928 341 : beentry->st_appname[NAMEDATALEN - 1] = '\0';
2929 341 : beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
2930 341 : beentry->st_progress_command = PROGRESS_COMMAND_INVALID;
2931 341 : beentry->st_progress_command_target = InvalidOid;
2932 :
2933 : /*
2934 : * we don't zero st_progress_param here to save cycles; nobody should
2935 : * examine it until st_progress_command has been set to something other
2936 : * than PROGRESS_COMMAND_INVALID
2937 : */
2938 :
2939 341 : pgstat_increment_changecount_after(beentry);
2940 :
2941 : /* Update app name to current GUC setting */
2942 341 : if (application_name)
2943 341 : pgstat_report_appname(application_name);
2944 341 : }
2945 :
2946 : /*
2947 : * Shut down a single backend's statistics reporting at process exit.
2948 : *
2949 : * Flush any remaining statistics counts out to the collector.
2950 : * Without this, operations triggered during backend exit (such as
2951 : * temp table deletions) won't be counted.
2952 : *
2953 : * Lastly, clear out our entry in the PgBackendStatus array.
2954 : */
2955 : static void
2956 341 : pgstat_beshutdown_hook(int code, Datum arg)
2957 : {
2958 341 : volatile PgBackendStatus *beentry = MyBEEntry;
2959 :
2960 : /*
2961 : * If we got as far as discovering our own database ID, we can report what
2962 : * we did to the collector. Otherwise, we'd be sending an invalid
2963 : * database ID, so forget it. (This means that accesses to pg_database
2964 : * during failed backend starts might never get counted.)
2965 : */
2966 341 : if (OidIsValid(MyDatabaseId))
2967 335 : pgstat_report_stat(true);
2968 :
2969 : /*
2970 : * Clear my status entry, following the protocol of bumping st_changecount
2971 : * before and after. We use a volatile pointer here to ensure the
2972 : * compiler doesn't try to get cute.
2973 : */
2974 341 : pgstat_increment_changecount_before(beentry);
2975 :
2976 341 : beentry->st_procpid = 0; /* mark invalid */
2977 :
2978 341 : pgstat_increment_changecount_after(beentry);
2979 341 : }
2980 :
2981 :
2982 : /* ----------
2983 : * pgstat_report_activity() -
2984 : *
2985 : * Called from tcop/postgres.c to report what the backend is actually doing
2986 : * (but note cmd_str can be NULL for certain cases).
2987 : *
2988 : * All updates of the status entry follow the protocol of bumping
2989 : * st_changecount before and after. We use a volatile pointer here to
2990 : * ensure the compiler doesn't try to get cute.
2991 : * ----------
2992 : */
2993 : void
2994 55032 : pgstat_report_activity(BackendState state, const char *cmd_str)
2995 : {
2996 55032 : volatile PgBackendStatus *beentry = MyBEEntry;
2997 : TimestampTz start_timestamp;
2998 : TimestampTz current_timestamp;
2999 55032 : int len = 0;
3000 :
3001 : TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str);
3002 :
3003 55032 : if (!beentry)
3004 0 : return;
3005 :
3006 55032 : if (!pgstat_track_activities)
3007 : {
3008 0 : if (beentry->st_state != STATE_DISABLED)
3009 : {
3010 0 : volatile PGPROC *proc = MyProc;
3011 :
3012 : /*
3013 : * track_activities is disabled, but we last reported a
3014 : * non-disabled state. As our final update, change the state and
3015 : * clear fields we will not be updating anymore.
3016 : */
3017 0 : pgstat_increment_changecount_before(beentry);
3018 0 : beentry->st_state = STATE_DISABLED;
3019 0 : beentry->st_state_start_timestamp = 0;
3020 0 : beentry->st_activity[0] = '\0';
3021 0 : beentry->st_activity_start_timestamp = 0;
3022 : /* st_xact_start_timestamp and wait_event_info are also disabled */
3023 0 : beentry->st_xact_start_timestamp = 0;
3024 0 : proc->wait_event_info = 0;
3025 0 : pgstat_increment_changecount_after(beentry);
3026 : }
3027 0 : return;
3028 : }
3029 :
3030 : /*
3031 : * To minimize the time spent modifying the entry, fetch all the needed
3032 : * data first.
3033 : */
3034 55032 : start_timestamp = GetCurrentStatementStartTimestamp();
3035 55032 : if (cmd_str != NULL)
3036 : {
3037 27230 : len = pg_mbcliplen(cmd_str, strlen(cmd_str),
3038 : pgstat_track_activity_query_size - 1);
3039 : }
3040 55032 : current_timestamp = GetCurrentTimestamp();
3041 :
3042 : /*
3043 : * Now update the status entry
3044 : */
3045 55032 : pgstat_increment_changecount_before(beentry);
3046 :
3047 55032 : beentry->st_state = state;
3048 55032 : beentry->st_state_start_timestamp = current_timestamp;
3049 :
3050 55032 : if (cmd_str != NULL)
3051 : {
3052 27230 : memcpy((char *) beentry->st_activity, cmd_str, len);
3053 27230 : beentry->st_activity[len] = '\0';
3054 27230 : beentry->st_activity_start_timestamp = start_timestamp;
3055 : }
3056 :
3057 55032 : pgstat_increment_changecount_after(beentry);
3058 : }
3059 :
3060 : /*-----------
3061 : * pgstat_progress_start_command() -
3062 : *
3063 : * Set st_progress_command (and st_progress_command_target) in own backend
3064 : * entry. Also, zero-initialize st_progress_param array.
3065 : *-----------
3066 : */
3067 : void
3068 390 : pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
3069 : {
3070 390 : volatile PgBackendStatus *beentry = MyBEEntry;
3071 :
3072 390 : if (!beentry || !pgstat_track_activities)
3073 390 : return;
3074 :
3075 390 : pgstat_increment_changecount_before(beentry);
3076 390 : beentry->st_progress_command = cmdtype;
3077 390 : beentry->st_progress_command_target = relid;
3078 390 : MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param));
3079 390 : pgstat_increment_changecount_after(beentry);
3080 : }
3081 :
3082 : /*-----------
3083 : * pgstat_progress_update_param() -
3084 : *
3085 : * Update index'th member in st_progress_param[] of own backend entry.
3086 : *-----------
3087 : */
3088 : void
3089 90431 : pgstat_progress_update_param(int index, int64 val)
3090 : {
3091 90431 : volatile PgBackendStatus *beentry = MyBEEntry;
3092 :
3093 90431 : Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
3094 :
3095 90431 : if (!beentry || !pgstat_track_activities)
3096 90431 : return;
3097 :
3098 90431 : pgstat_increment_changecount_before(beentry);
3099 90431 : beentry->st_progress_param[index] = val;
3100 90431 : pgstat_increment_changecount_after(beentry);
3101 : }
3102 :
3103 : /*-----------
3104 : * pgstat_progress_update_multi_param() -
3105 : *
3106 : * Update multiple members in st_progress_param[] of own backend entry.
3107 : * This is atomic; readers won't see intermediate states.
3108 : *-----------
3109 : */
3110 : void
3111 456 : pgstat_progress_update_multi_param(int nparam, const int *index,
3112 : const int64 *val)
3113 : {
3114 456 : volatile PgBackendStatus *beentry = MyBEEntry;
3115 : int i;
3116 :
3117 456 : if (!beentry || !pgstat_track_activities || nparam == 0)
3118 456 : return;
3119 :
3120 456 : pgstat_increment_changecount_before(beentry);
3121 :
3122 1758 : for (i = 0; i < nparam; ++i)
3123 : {
3124 1302 : Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM);
3125 :
3126 1302 : beentry->st_progress_param[index[i]] = val[i];
3127 : }
3128 :
3129 456 : pgstat_increment_changecount_after(beentry);
3130 : }
3131 :
3132 : /*-----------
3133 : * pgstat_progress_end_command() -
3134 : *
3135 : * Reset st_progress_command (and st_progress_command_target) in own backend
3136 : * entry. This signals the end of the command.
3137 : *-----------
3138 : */
3139 : void
3140 4019 : pgstat_progress_end_command(void)
3141 : {
3142 4019 : volatile PgBackendStatus *beentry = MyBEEntry;
3143 :
3144 4019 : if (!beentry)
3145 0 : return;
3146 4019 : if (!pgstat_track_activities
3147 0 : && beentry->st_progress_command == PROGRESS_COMMAND_INVALID)
3148 0 : return;
3149 :
3150 4019 : pgstat_increment_changecount_before(beentry);
3151 4019 : beentry->st_progress_command = PROGRESS_COMMAND_INVALID;
3152 4019 : beentry->st_progress_command_target = InvalidOid;
3153 4019 : pgstat_increment_changecount_after(beentry);
3154 : }
3155 :
3156 : /* ----------
3157 : * pgstat_report_appname() -
3158 : *
3159 : * Called to update our application name.
3160 : * ----------
3161 : */
3162 : void
3163 677 : pgstat_report_appname(const char *appname)
3164 : {
3165 677 : volatile PgBackendStatus *beentry = MyBEEntry;
3166 : int len;
3167 :
3168 677 : if (!beentry)
3169 682 : return;
3170 :
3171 : /* This should be unnecessary if GUC did its job, but be safe */
3172 672 : len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1);
3173 :
3174 : /*
3175 : * Update my status entry, following the protocol of bumping
3176 : * st_changecount before and after. We use a volatile pointer here to
3177 : * ensure the compiler doesn't try to get cute.
3178 : */
3179 672 : pgstat_increment_changecount_before(beentry);
3180 :
3181 672 : memcpy((char *) beentry->st_appname, appname, len);
3182 672 : beentry->st_appname[len] = '\0';
3183 :
3184 672 : pgstat_increment_changecount_after(beentry);
3185 : }
3186 :
3187 : /*
3188 : * Report current transaction start timestamp as the specified value.
3189 : * Zero means there is no active transaction.
3190 : */
3191 : void
3192 52436 : pgstat_report_xact_timestamp(TimestampTz tstamp)
3193 : {
3194 52436 : volatile PgBackendStatus *beentry = MyBEEntry;
3195 :
3196 52436 : if (!pgstat_track_activities || !beentry)
3197 52438 : return;
3198 :
3199 : /*
3200 : * Update my status entry, following the protocol of bumping
3201 : * st_changecount before and after. We use a volatile pointer here to
3202 : * ensure the compiler doesn't try to get cute.
3203 : */
3204 52434 : pgstat_increment_changecount_before(beentry);
3205 52434 : beentry->st_xact_start_timestamp = tstamp;
3206 52434 : pgstat_increment_changecount_after(beentry);
3207 : }
3208 :
3209 : /* ----------
3210 : * pgstat_read_current_status() -
3211 : *
3212 : * Copy the current contents of the PgBackendStatus array to local memory,
3213 : * if not already done in this transaction.
3214 : * ----------
3215 : */
3216 : static void
3217 0 : pgstat_read_current_status(void)
3218 : {
3219 : volatile PgBackendStatus *beentry;
3220 : LocalPgBackendStatus *localtable;
3221 : LocalPgBackendStatus *localentry;
3222 : char *localappname,
3223 : *localactivity;
3224 : #ifdef USE_SSL
3225 : PgBackendSSLStatus *localsslstatus;
3226 : #endif
3227 : int i;
3228 :
3229 0 : Assert(!pgStatRunningInCollector);
3230 0 : if (localBackendStatusTable)
3231 0 : return; /* already done */
3232 :
3233 0 : pgstat_setup_memcxt();
3234 :
3235 0 : localtable = (LocalPgBackendStatus *)
3236 0 : MemoryContextAlloc(pgStatLocalContext,
3237 : sizeof(LocalPgBackendStatus) * NumBackendStatSlots);
3238 0 : localappname = (char *)
3239 0 : MemoryContextAlloc(pgStatLocalContext,
3240 0 : NAMEDATALEN * NumBackendStatSlots);
3241 0 : localactivity = (char *)
3242 0 : MemoryContextAlloc(pgStatLocalContext,
3243 0 : pgstat_track_activity_query_size * NumBackendStatSlots);
3244 : #ifdef USE_SSL
3245 : localsslstatus = (PgBackendSSLStatus *)
3246 : MemoryContextAlloc(pgStatLocalContext,
3247 : sizeof(PgBackendSSLStatus) * NumBackendStatSlots);
3248 : #endif
3249 :
3250 0 : localNumBackends = 0;
3251 :
3252 0 : beentry = BackendStatusArray;
3253 0 : localentry = localtable;
3254 0 : for (i = 1; i <= NumBackendStatSlots; i++)
3255 : {
3256 : /*
3257 : * Follow the protocol of retrying if st_changecount changes while we
3258 : * copy the entry, or if it's odd. (The check for odd is needed to
3259 : * cover the case where we are able to completely copy the entry while
3260 : * the source backend is between increment steps.) We use a volatile
3261 : * pointer here to ensure the compiler doesn't try to get cute.
3262 : */
3263 : for (;;)
3264 : {
3265 : int before_changecount;
3266 : int after_changecount;
3267 :
3268 0 : pgstat_save_changecount_before(beentry, before_changecount);
3269 :
3270 0 : localentry->backendStatus.st_procpid = beentry->st_procpid;
3271 0 : if (localentry->backendStatus.st_procpid > 0)
3272 : {
3273 0 : memcpy(&localentry->backendStatus, (char *) beentry, sizeof(PgBackendStatus));
3274 :
3275 : /*
3276 : * strcpy is safe even if the string is modified concurrently,
3277 : * because there's always a \0 at the end of the buffer.
3278 : */
3279 0 : strcpy(localappname, (char *) beentry->st_appname);
3280 0 : localentry->backendStatus.st_appname = localappname;
3281 0 : strcpy(localactivity, (char *) beentry->st_activity);
3282 0 : localentry->backendStatus.st_activity = localactivity;
3283 0 : localentry->backendStatus.st_ssl = beentry->st_ssl;
3284 : #ifdef USE_SSL
3285 : if (beentry->st_ssl)
3286 : {
3287 : memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus));
3288 : localentry->backendStatus.st_sslstatus = localsslstatus;
3289 : }
3290 : #endif
3291 : }
3292 :
3293 0 : pgstat_save_changecount_after(beentry, after_changecount);
3294 0 : if (before_changecount == after_changecount &&
3295 0 : (before_changecount & 1) == 0)
3296 0 : break;
3297 :
3298 : /* Make sure we can break out of loop if stuck... */
3299 0 : CHECK_FOR_INTERRUPTS();
3300 0 : }
3301 :
3302 0 : beentry++;
3303 : /* Only valid entries get included into the local array */
3304 0 : if (localentry->backendStatus.st_procpid > 0)
3305 : {
3306 0 : BackendIdGetTransactionIds(i,
3307 : &localentry->backend_xid,
3308 : &localentry->backend_xmin);
3309 :
3310 0 : localentry++;
3311 0 : localappname += NAMEDATALEN;
3312 0 : localactivity += pgstat_track_activity_query_size;
3313 : #ifdef USE_SSL
3314 : localsslstatus++;
3315 : #endif
3316 0 : localNumBackends++;
3317 : }
3318 : }
3319 :
3320 : /* Set the pointer only after completion of a valid table */
3321 0 : localBackendStatusTable = localtable;
3322 : }
3323 :
3324 : /* ----------
3325 : * pgstat_get_wait_event_type() -
3326 : *
3327 : * Return a string representing the current wait event type, backend is
3328 : * waiting on.
3329 : */
3330 : const char *
3331 0 : pgstat_get_wait_event_type(uint32 wait_event_info)
3332 : {
3333 : uint32 classId;
3334 : const char *event_type;
3335 :
3336 : /* report process as not waiting. */
3337 0 : if (wait_event_info == 0)
3338 0 : return NULL;
3339 :
3340 0 : classId = wait_event_info & 0xFF000000;
3341 :
3342 0 : switch (classId)
3343 : {
3344 : case PG_WAIT_LWLOCK:
3345 0 : event_type = "LWLock";
3346 0 : break;
3347 : case PG_WAIT_LOCK:
3348 0 : event_type = "Lock";
3349 0 : break;
3350 : case PG_WAIT_BUFFER_PIN:
3351 0 : event_type = "BufferPin";
3352 0 : break;
3353 : case PG_WAIT_ACTIVITY:
3354 0 : event_type = "Activity";
3355 0 : break;
3356 : case PG_WAIT_CLIENT:
3357 0 : event_type = "Client";
3358 0 : break;
3359 : case PG_WAIT_EXTENSION:
3360 0 : event_type = "Extension";
3361 0 : break;
3362 : case PG_WAIT_IPC:
3363 0 : event_type = "IPC";
3364 0 : break;
3365 : case PG_WAIT_TIMEOUT:
3366 0 : event_type = "Timeout";
3367 0 : break;
3368 : case PG_WAIT_IO:
3369 0 : event_type = "IO";
3370 0 : break;
3371 : default:
3372 0 : event_type = "???";
3373 0 : break;
3374 : }
3375 :
3376 0 : return event_type;
3377 : }
3378 :
3379 : /* ----------
3380 : * pgstat_get_wait_event() -
3381 : *
3382 : * Return a string representing the current wait event, backend is
3383 : * waiting on.
3384 : */
3385 : const char *
3386 0 : pgstat_get_wait_event(uint32 wait_event_info)
3387 : {
3388 : uint32 classId;
3389 : uint16 eventId;
3390 : const char *event_name;
3391 :
3392 : /* report process as not waiting. */
3393 0 : if (wait_event_info == 0)
3394 0 : return NULL;
3395 :
3396 0 : classId = wait_event_info & 0xFF000000;
3397 0 : eventId = wait_event_info & 0x0000FFFF;
3398 :
3399 0 : switch (classId)
3400 : {
3401 : case PG_WAIT_LWLOCK:
3402 0 : event_name = GetLWLockIdentifier(classId, eventId);
3403 0 : break;
3404 : case PG_WAIT_LOCK:
3405 0 : event_name = GetLockNameFromTagType(eventId);
3406 0 : break;
3407 : case PG_WAIT_BUFFER_PIN:
3408 0 : event_name = "BufferPin";
3409 0 : break;
3410 : case PG_WAIT_ACTIVITY:
3411 : {
3412 0 : WaitEventActivity w = (WaitEventActivity) wait_event_info;
3413 :
3414 0 : event_name = pgstat_get_wait_activity(w);
3415 0 : break;
3416 : }
3417 : case PG_WAIT_CLIENT:
3418 : {
3419 0 : WaitEventClient w = (WaitEventClient) wait_event_info;
3420 :
3421 0 : event_name = pgstat_get_wait_client(w);
3422 0 : break;
3423 : }
3424 : case PG_WAIT_EXTENSION:
3425 0 : event_name = "Extension";
3426 0 : break;
3427 : case PG_WAIT_IPC:
3428 : {
3429 0 : WaitEventIPC w = (WaitEventIPC) wait_event_info;
3430 :
3431 0 : event_name = pgstat_get_wait_ipc(w);
3432 0 : break;
3433 : }
3434 : case PG_WAIT_TIMEOUT:
3435 : {
3436 0 : WaitEventTimeout w = (WaitEventTimeout) wait_event_info;
3437 :
3438 0 : event_name = pgstat_get_wait_timeout(w);
3439 0 : break;
3440 : }
3441 : case PG_WAIT_IO:
3442 : {
3443 0 : WaitEventIO w = (WaitEventIO) wait_event_info;
3444 :
3445 0 : event_name = pgstat_get_wait_io(w);
3446 0 : break;
3447 : }
3448 : default:
3449 0 : event_name = "unknown wait event";
3450 0 : break;
3451 : }
3452 :
3453 0 : return event_name;
3454 : }
3455 :
3456 : /* ----------
3457 : * pgstat_get_wait_activity() -
3458 : *
3459 : * Convert WaitEventActivity to string.
3460 : * ----------
3461 : */
3462 : static const char *
3463 0 : pgstat_get_wait_activity(WaitEventActivity w)
3464 : {
3465 0 : const char *event_name = "unknown wait event";
3466 :
3467 0 : switch (w)
3468 : {
3469 : case WAIT_EVENT_ARCHIVER_MAIN:
3470 0 : event_name = "ArchiverMain";
3471 0 : break;
3472 : case WAIT_EVENT_AUTOVACUUM_MAIN:
3473 0 : event_name = "AutoVacuumMain";
3474 0 : break;
3475 : case WAIT_EVENT_BGWRITER_HIBERNATE:
3476 0 : event_name = "BgWriterHibernate";
3477 0 : break;
3478 : case WAIT_EVENT_BGWRITER_MAIN:
3479 0 : event_name = "BgWriterMain";
3480 0 : break;
3481 : case WAIT_EVENT_CHECKPOINTER_MAIN:
3482 0 : event_name = "CheckpointerMain";
3483 0 : break;
3484 : case WAIT_EVENT_LOGICAL_LAUNCHER_MAIN:
3485 0 : event_name = "LogicalLauncherMain";
3486 0 : break;
3487 : case WAIT_EVENT_LOGICAL_APPLY_MAIN:
3488 0 : event_name = "LogicalApplyMain";
3489 0 : break;
3490 : case WAIT_EVENT_PGSTAT_MAIN:
3491 0 : event_name = "PgStatMain";
3492 0 : break;
3493 : case WAIT_EVENT_RECOVERY_WAL_ALL:
3494 0 : event_name = "RecoveryWalAll";
3495 0 : break;
3496 : case WAIT_EVENT_RECOVERY_WAL_STREAM:
3497 0 : event_name = "RecoveryWalStream";
3498 0 : break;
3499 : case WAIT_EVENT_SYSLOGGER_MAIN:
3500 0 : event_name = "SysLoggerMain";
3501 0 : break;
3502 : case WAIT_EVENT_WAL_RECEIVER_MAIN:
3503 0 : event_name = "WalReceiverMain";
3504 0 : break;
3505 : case WAIT_EVENT_WAL_SENDER_MAIN:
3506 0 : event_name = "WalSenderMain";
3507 0 : break;
3508 : case WAIT_EVENT_WAL_WRITER_MAIN:
3509 0 : event_name = "WalWriterMain";
3510 0 : break;
3511 : /* no default case, so that compiler will warn */
3512 : }
3513 :
3514 0 : return event_name;
3515 : }
3516 :
3517 : /* ----------
3518 : * pgstat_get_wait_client() -
3519 : *
3520 : * Convert WaitEventClient to string.
3521 : * ----------
3522 : */
3523 : static const char *
3524 0 : pgstat_get_wait_client(WaitEventClient w)
3525 : {
3526 0 : const char *event_name = "unknown wait event";
3527 :
3528 0 : switch (w)
3529 : {
3530 : case WAIT_EVENT_CLIENT_READ:
3531 0 : event_name = "ClientRead";
3532 0 : break;
3533 : case WAIT_EVENT_CLIENT_WRITE:
3534 0 : event_name = "ClientWrite";
3535 0 : break;
3536 : case WAIT_EVENT_LIBPQWALRECEIVER_CONNECT:
3537 0 : event_name = "LibPQWalReceiverConnect";
3538 0 : break;
3539 : case WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE:
3540 0 : event_name = "LibPQWalReceiverReceive";
3541 0 : break;
3542 : case WAIT_EVENT_SSL_OPEN_SERVER:
3543 0 : event_name = "SSLOpenServer";
3544 0 : break;
3545 : case WAIT_EVENT_WAL_RECEIVER_WAIT_START:
3546 0 : event_name = "WalReceiverWaitStart";
3547 0 : break;
3548 : case WAIT_EVENT_WAL_SENDER_WAIT_WAL:
3549 0 : event_name = "WalSenderWaitForWAL";
3550 0 : break;
3551 : case WAIT_EVENT_WAL_SENDER_WRITE_DATA:
3552 0 : event_name = "WalSenderWriteData";
3553 0 : break;
3554 : /* no default case, so that compiler will warn */
3555 : }
3556 :
3557 0 : return event_name;
3558 : }
3559 :
3560 : /* ----------
3561 : * pgstat_get_wait_ipc() -
3562 : *
3563 : * Convert WaitEventIPC to string.
3564 : * ----------
3565 : */
3566 : static const char *
3567 0 : pgstat_get_wait_ipc(WaitEventIPC w)
3568 : {
3569 0 : const char *event_name = "unknown wait event";
3570 :
3571 0 : switch (w)
3572 : {
3573 : case WAIT_EVENT_BGWORKER_SHUTDOWN:
3574 0 : event_name = "BgWorkerShutdown";
3575 0 : break;
3576 : case WAIT_EVENT_BGWORKER_STARTUP:
3577 0 : event_name = "BgWorkerStartup";
3578 0 : break;
3579 : case WAIT_EVENT_BTREE_PAGE:
3580 0 : event_name = "BtreePage";
3581 0 : break;
3582 : case WAIT_EVENT_EXECUTE_GATHER:
3583 0 : event_name = "ExecuteGather";
3584 0 : break;
3585 : case WAIT_EVENT_LOGICAL_SYNC_DATA:
3586 0 : event_name = "LogicalSyncData";
3587 0 : break;
3588 : case WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE:
3589 0 : event_name = "LogicalSyncStateChange";
3590 0 : break;
3591 : case WAIT_EVENT_MQ_INTERNAL:
3592 0 : event_name = "MessageQueueInternal";
3593 0 : break;
3594 : case WAIT_EVENT_MQ_PUT_MESSAGE:
3595 0 : event_name = "MessageQueuePutMessage";
3596 0 : break;
3597 : case WAIT_EVENT_MQ_RECEIVE:
3598 0 : event_name = "MessageQueueReceive";
3599 0 : break;
3600 : case WAIT_EVENT_MQ_SEND:
3601 0 : event_name = "MessageQueueSend";
3602 0 : break;
3603 : case WAIT_EVENT_PARALLEL_FINISH:
3604 0 : event_name = "ParallelFinish";
3605 0 : break;
3606 : case WAIT_EVENT_PARALLEL_BITMAP_SCAN:
3607 0 : event_name = "ParallelBitmapScan";
3608 0 : break;
3609 : case WAIT_EVENT_PROCARRAY_GROUP_UPDATE:
3610 0 : event_name = "ProcArrayGroupUpdate";
3611 0 : break;
3612 : case WAIT_EVENT_CLOG_GROUP_UPDATE:
3613 0 : event_name = "ClogGroupUpdate";
3614 0 : break;
3615 : case WAIT_EVENT_REPLICATION_ORIGIN_DROP:
3616 0 : event_name = "ReplicationOriginDrop";
3617 0 : break;
3618 : case WAIT_EVENT_REPLICATION_SLOT_DROP:
3619 0 : event_name = "ReplicationSlotDrop";
3620 0 : break;
3621 : case WAIT_EVENT_SAFE_SNAPSHOT:
3622 0 : event_name = "SafeSnapshot";
3623 0 : break;
3624 : case WAIT_EVENT_SYNC_REP:
3625 0 : event_name = "SyncRep";
3626 0 : break;
3627 : /* no default case, so that compiler will warn */
3628 : }
3629 :
3630 0 : return event_name;
3631 : }
3632 :
3633 : /* ----------
3634 : * pgstat_get_wait_timeout() -
3635 : *
3636 : * Convert WaitEventTimeout to string.
3637 : * ----------
3638 : */
3639 : static const char *
3640 0 : pgstat_get_wait_timeout(WaitEventTimeout w)
3641 : {
3642 0 : const char *event_name = "unknown wait event";
3643 :
3644 0 : switch (w)
3645 : {
3646 : case WAIT_EVENT_BASE_BACKUP_THROTTLE:
3647 0 : event_name = "BaseBackupThrottle";
3648 0 : break;
3649 : case WAIT_EVENT_PG_SLEEP:
3650 0 : event_name = "PgSleep";
3651 0 : break;
3652 : case WAIT_EVENT_RECOVERY_APPLY_DELAY:
3653 0 : event_name = "RecoveryApplyDelay";
3654 0 : break;
3655 : /* no default case, so that compiler will warn */
3656 : }
3657 :
3658 0 : return event_name;
3659 : }
3660 :
3661 : /* ----------
3662 : * pgstat_get_wait_io() -
3663 : *
3664 : * Convert WaitEventIO to string.
3665 : * ----------
3666 : */
3667 : static const char *
3668 0 : pgstat_get_wait_io(WaitEventIO w)
3669 : {
3670 0 : const char *event_name = "unknown wait event";
3671 :
3672 0 : switch (w)
3673 : {
3674 : case WAIT_EVENT_BUFFILE_READ:
3675 0 : event_name = "BufFileRead";
3676 0 : break;
3677 : case WAIT_EVENT_BUFFILE_WRITE:
3678 0 : event_name = "BufFileWrite";
3679 0 : break;
3680 : case WAIT_EVENT_CONTROL_FILE_READ:
3681 0 : event_name = "ControlFileRead";
3682 0 : break;
3683 : case WAIT_EVENT_CONTROL_FILE_SYNC:
3684 0 : event_name = "ControlFileSync";
3685 0 : break;
3686 : case WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE:
3687 0 : event_name = "ControlFileSyncUpdate";
3688 0 : break;
3689 : case WAIT_EVENT_CONTROL_FILE_WRITE:
3690 0 : event_name = "ControlFileWrite";
3691 0 : break;
3692 : case WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE:
3693 0 : event_name = "ControlFileWriteUpdate";
3694 0 : break;
3695 : case WAIT_EVENT_COPY_FILE_READ:
3696 0 : event_name = "CopyFileRead";
3697 0 : break;
3698 : case WAIT_EVENT_COPY_FILE_WRITE:
3699 0 : event_name = "CopyFileWrite";
3700 0 : break;
3701 : case WAIT_EVENT_DATA_FILE_EXTEND:
3702 0 : event_name = "DataFileExtend";
3703 0 : break;
3704 : case WAIT_EVENT_DATA_FILE_FLUSH:
3705 0 : event_name = "DataFileFlush";
3706 0 : break;
3707 : case WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC:
3708 0 : event_name = "DataFileImmediateSync";
3709 0 : break;
3710 : case WAIT_EVENT_DATA_FILE_PREFETCH:
3711 0 : event_name = "DataFilePrefetch";
3712 0 : break;
3713 : case WAIT_EVENT_DATA_FILE_READ:
3714 0 : event_name = "DataFileRead";
3715 0 : break;
3716 : case WAIT_EVENT_DATA_FILE_SYNC:
3717 0 : event_name = "DataFileSync";
3718 0 : break;
3719 : case WAIT_EVENT_DATA_FILE_TRUNCATE:
3720 0 : event_name = "DataFileTruncate";
3721 0 : break;
3722 : case WAIT_EVENT_DATA_FILE_WRITE:
3723 0 : event_name = "DataFileWrite";
3724 0 : break;
3725 : case WAIT_EVENT_DSM_FILL_ZERO_WRITE:
3726 0 : event_name = "DSMFillZeroWrite";
3727 0 : break;
3728 : case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ:
3729 0 : event_name = "LockFileAddToDataDirRead";
3730 0 : break;
3731 : case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC:
3732 0 : event_name = "LockFileAddToDataDirSync";
3733 0 : break;
3734 : case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE:
3735 0 : event_name = "LockFileAddToDataDirWrite";
3736 0 : break;
3737 : case WAIT_EVENT_LOCK_FILE_CREATE_READ:
3738 0 : event_name = "LockFileCreateRead";
3739 0 : break;
3740 : case WAIT_EVENT_LOCK_FILE_CREATE_SYNC:
3741 0 : event_name = "LockFileCreateSync";
3742 0 : break;
3743 : case WAIT_EVENT_LOCK_FILE_CREATE_WRITE:
3744 0 : event_name = "LockFileCreateWRITE";
3745 0 : break;
3746 : case WAIT_EVENT_LOCK_FILE_RECHECKDATADIR_READ:
3747 0 : event_name = "LockFileReCheckDataDirRead";
3748 0 : break;
3749 : case WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC:
3750 0 : event_name = "LogicalRewriteCheckpointSync";
3751 0 : break;
3752 : case WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC:
3753 0 : event_name = "LogicalRewriteMappingSync";
3754 0 : break;
3755 : case WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE:
3756 0 : event_name = "LogicalRewriteMappingWrite";
3757 0 : break;
3758 : case WAIT_EVENT_LOGICAL_REWRITE_SYNC:
3759 0 : event_name = "LogicalRewriteSync";
3760 0 : break;
3761 : case WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE:
3762 0 : event_name = "LogicalRewriteTruncate";
3763 0 : break;
3764 : case WAIT_EVENT_LOGICAL_REWRITE_WRITE:
3765 0 : event_name = "LogicalRewriteWrite";
3766 0 : break;
3767 : case WAIT_EVENT_RELATION_MAP_READ:
3768 0 : event_name = "RelationMapRead";
3769 0 : break;
3770 : case WAIT_EVENT_RELATION_MAP_SYNC:
3771 0 : event_name = "RelationMapSync";
3772 0 : break;
3773 : case WAIT_EVENT_RELATION_MAP_WRITE:
3774 0 : event_name = "RelationMapWrite";
3775 0 : break;
3776 : case WAIT_EVENT_REORDER_BUFFER_READ:
3777 0 : event_name = "ReorderBufferRead";
3778 0 : break;
3779 : case WAIT_EVENT_REORDER_BUFFER_WRITE:
3780 0 : event_name = "ReorderBufferWrite";
3781 0 : break;
3782 : case WAIT_EVENT_REORDER_LOGICAL_MAPPING_READ:
3783 0 : event_name = "ReorderLogicalMappingRead";
3784 0 : break;
3785 : case WAIT_EVENT_REPLICATION_SLOT_READ:
3786 0 : event_name = "ReplicationSlotRead";
3787 0 : break;
3788 : case WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC:
3789 0 : event_name = "ReplicationSlotRestoreSync";
3790 0 : break;
3791 : case WAIT_EVENT_REPLICATION_SLOT_SYNC:
3792 0 : event_name = "ReplicationSlotSync";
3793 0 : break;
3794 : case WAIT_EVENT_REPLICATION_SLOT_WRITE:
3795 0 : event_name = "ReplicationSlotWrite";
3796 0 : break;
3797 : case WAIT_EVENT_SLRU_FLUSH_SYNC:
3798 0 : event_name = "SLRUFlushSync";
3799 0 : break;
3800 : case WAIT_EVENT_SLRU_READ:
3801 0 : event_name = "SLRURead";
3802 0 : break;
3803 : case WAIT_EVENT_SLRU_SYNC:
3804 0 : event_name = "SLRUSync";
3805 0 : break;
3806 : case WAIT_EVENT_SLRU_WRITE:
3807 0 : event_name = "SLRUWrite";
3808 0 : break;
3809 : case WAIT_EVENT_SNAPBUILD_READ:
3810 0 : event_name = "SnapbuildRead";
3811 0 : break;
3812 : case WAIT_EVENT_SNAPBUILD_SYNC:
3813 0 : event_name = "SnapbuildSync";
3814 0 : break;
3815 : case WAIT_EVENT_SNAPBUILD_WRITE:
3816 0 : event_name = "SnapbuildWrite";
3817 0 : break;
3818 : case WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC:
3819 0 : event_name = "TimelineHistoryFileSync";
3820 0 : break;
3821 : case WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE:
3822 0 : event_name = "TimelineHistoryFileWrite";
3823 0 : break;
3824 : case WAIT_EVENT_TIMELINE_HISTORY_READ:
3825 0 : event_name = "TimelineHistoryRead";
3826 0 : break;
3827 : case WAIT_EVENT_TIMELINE_HISTORY_SYNC:
3828 0 : event_name = "TimelineHistorySync";
3829 0 : break;
3830 : case WAIT_EVENT_TIMELINE_HISTORY_WRITE:
3831 0 : event_name = "TimelineHistoryWrite";
3832 0 : break;
3833 : case WAIT_EVENT_TWOPHASE_FILE_READ:
3834 0 : event_name = "TwophaseFileRead";
3835 0 : break;
3836 : case WAIT_EVENT_TWOPHASE_FILE_SYNC:
3837 0 : event_name = "TwophaseFileSync";
3838 0 : break;
3839 : case WAIT_EVENT_TWOPHASE_FILE_WRITE:
3840 0 : event_name = "TwophaseFileWrite";
3841 0 : break;
3842 : case WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ:
3843 0 : event_name = "WALSenderTimelineHistoryRead";
3844 0 : break;
3845 : case WAIT_EVENT_WAL_BOOTSTRAP_SYNC:
3846 0 : event_name = "WALBootstrapSync";
3847 0 : break;
3848 : case WAIT_EVENT_WAL_BOOTSTRAP_WRITE:
3849 0 : event_name = "WALBootstrapWrite";
3850 0 : break;
3851 : case WAIT_EVENT_WAL_COPY_READ:
3852 0 : event_name = "WALCopyRead";
3853 0 : break;
3854 : case WAIT_EVENT_WAL_COPY_SYNC:
3855 0 : event_name = "WALCopySync";
3856 0 : break;
3857 : case WAIT_EVENT_WAL_COPY_WRITE:
3858 0 : event_name = "WALCopyWrite";
3859 0 : break;
3860 : case WAIT_EVENT_WAL_INIT_SYNC:
3861 0 : event_name = "WALInitSync";
3862 0 : break;
3863 : case WAIT_EVENT_WAL_INIT_WRITE:
3864 0 : event_name = "WALInitWrite";
3865 0 : break;
3866 : case WAIT_EVENT_WAL_READ:
3867 0 : event_name = "WALRead";
3868 0 : break;
3869 : case WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN:
3870 0 : event_name = "WALSyncMethodAssign";
3871 0 : break;
3872 : case WAIT_EVENT_WAL_WRITE:
3873 0 : event_name = "WALWrite";
3874 0 : break;
3875 :
3876 : /* no default case, so that compiler will warn */
3877 : }
3878 :
3879 0 : return event_name;
3880 : }
3881 :
3882 :
3883 : /* ----------
3884 : * pgstat_get_backend_current_activity() -
3885 : *
3886 : * Return a string representing the current activity of the backend with
3887 : * the specified PID. This looks directly at the BackendStatusArray,
3888 : * and so will provide current information regardless of the age of our
3889 : * transaction's snapshot of the status array.
3890 : *
3891 : * It is the caller's responsibility to invoke this only for backends whose
3892 : * state is expected to remain stable while the result is in use. The
3893 : * only current use is in deadlock reporting, where we can expect that
3894 : * the target backend is blocked on a lock. (There are corner cases
3895 : * where the target's wait could get aborted while we are looking at it,
3896 : * but the very worst consequence is to return a pointer to a string
3897 : * that's been changed, so we won't worry too much.)
3898 : *
3899 : * Note: return strings for special cases match pg_stat_get_backend_activity.
3900 : * ----------
3901 : */
3902 : const char *
3903 0 : pgstat_get_backend_current_activity(int pid, bool checkUser)
3904 : {
3905 : PgBackendStatus *beentry;
3906 : int i;
3907 :
3908 0 : beentry = BackendStatusArray;
3909 0 : for (i = 1; i <= MaxBackends; i++)
3910 : {
3911 : /*
3912 : * Although we expect the target backend's entry to be stable, that
3913 : * doesn't imply that anyone else's is. To avoid identifying the
3914 : * wrong backend, while we check for a match to the desired PID we
3915 : * must follow the protocol of retrying if st_changecount changes
3916 : * while we examine the entry, or if it's odd. (This might be
3917 : * unnecessary, since fetching or storing an int is almost certainly
3918 : * atomic, but let's play it safe.) We use a volatile pointer here to
3919 : * ensure the compiler doesn't try to get cute.
3920 : */
3921 0 : volatile PgBackendStatus *vbeentry = beentry;
3922 : bool found;
3923 :
3924 : for (;;)
3925 : {
3926 : int before_changecount;
3927 : int after_changecount;
3928 :
3929 0 : pgstat_save_changecount_before(vbeentry, before_changecount);
3930 :
3931 0 : found = (vbeentry->st_procpid == pid);
3932 :
3933 0 : pgstat_save_changecount_after(vbeentry, after_changecount);
3934 :
3935 0 : if (before_changecount == after_changecount &&
3936 0 : (before_changecount & 1) == 0)
3937 0 : break;
3938 :
3939 : /* Make sure we can break out of loop if stuck... */
3940 0 : CHECK_FOR_INTERRUPTS();
3941 0 : }
3942 :
3943 0 : if (found)
3944 : {
3945 : /* Now it is safe to use the non-volatile pointer */
3946 0 : if (checkUser && !superuser() && beentry->st_userid != GetUserId())
3947 0 : return "<insufficient privilege>";
3948 0 : else if (*(beentry->st_activity) == '\0')
3949 0 : return "<command string not enabled>";
3950 : else
3951 0 : return beentry->st_activity;
3952 : }
3953 :
3954 0 : beentry++;
3955 : }
3956 :
3957 : /* If we get here, caller is in error ... */
3958 0 : return "<backend information not available>";
3959 : }
3960 :
3961 : /* ----------
3962 : * pgstat_get_crashed_backend_activity() -
3963 : *
3964 : * Return a string representing the current activity of the backend with
3965 : * the specified PID. Like the function above, but reads shared memory with
3966 : * the expectation that it may be corrupt. On success, copy the string
3967 : * into the "buffer" argument and return that pointer. On failure,
3968 : * return NULL.
3969 : *
3970 : * This function is only intended to be used by the postmaster to report the
3971 : * query that crashed a backend. In particular, no attempt is made to
3972 : * follow the correct concurrency protocol when accessing the
3973 : * BackendStatusArray. But that's OK, in the worst case we'll return a
3974 : * corrupted message. We also must take care not to trip on ereport(ERROR).
3975 : * ----------
3976 : */
3977 : const char *
3978 2 : pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
3979 : {
3980 : volatile PgBackendStatus *beentry;
3981 : int i;
3982 :
3983 2 : beentry = BackendStatusArray;
3984 :
3985 : /*
3986 : * We probably shouldn't get here before shared memory has been set up,
3987 : * but be safe.
3988 : */
3989 2 : if (beentry == NULL || BackendActivityBuffer == NULL)
3990 0 : return NULL;
3991 :
3992 226 : for (i = 1; i <= MaxBackends; i++)
3993 : {
3994 224 : if (beentry->st_procpid == pid)
3995 : {
3996 : /* Read pointer just once, so it can't change after validation */
3997 0 : const char *activity = beentry->st_activity;
3998 : const char *activity_last;
3999 :
4000 : /*
4001 : * We mustn't access activity string before we verify that it
4002 : * falls within the BackendActivityBuffer. To make sure that the
4003 : * entire string including its ending is contained within the
4004 : * buffer, subtract one activity length from the buffer size.
4005 : */
4006 0 : activity_last = BackendActivityBuffer + BackendActivityBufferSize
4007 0 : - pgstat_track_activity_query_size;
4008 :
4009 0 : if (activity < BackendActivityBuffer ||
4010 : activity > activity_last)
4011 0 : return NULL;
4012 :
4013 : /* If no string available, no point in a report */
4014 0 : if (activity[0] == '\0')
4015 0 : return NULL;
4016 :
4017 : /*
4018 : * Copy only ASCII-safe characters so we don't run into encoding
4019 : * problems when reporting the message; and be sure not to run off
4020 : * the end of memory.
4021 : */
4022 0 : ascii_safe_strlcpy(buffer, activity,
4023 0 : Min(buflen, pgstat_track_activity_query_size));
4024 :
4025 0 : return buffer;
4026 : }
4027 :
4028 224 : beentry++;
4029 : }
4030 :
4031 : /* PID not found */
4032 2 : return NULL;
4033 : }
4034 :
4035 : const char *
4036 0 : pgstat_get_backend_desc(BackendType backendType)
4037 : {
4038 0 : const char *backendDesc = "unknown process type";
4039 :
4040 0 : switch (backendType)
4041 : {
4042 : case B_AUTOVAC_LAUNCHER:
4043 0 : backendDesc = "autovacuum launcher";
4044 0 : break;
4045 : case B_AUTOVAC_WORKER:
4046 0 : backendDesc = "autovacuum worker";
4047 0 : break;
4048 : case B_BACKEND:
4049 0 : backendDesc = "client backend";
4050 0 : break;
4051 : case B_BG_WORKER:
4052 0 : backendDesc = "background worker";
4053 0 : break;
4054 : case B_BG_WRITER:
4055 0 : backendDesc = "background writer";
4056 0 : break;
4057 : case B_CHECKPOINTER:
4058 0 : backendDesc = "checkpointer";
4059 0 : break;
4060 : case B_STARTUP:
4061 0 : backendDesc = "startup";
4062 0 : break;
4063 : case B_WAL_RECEIVER:
4064 0 : backendDesc = "walreceiver";
4065 0 : break;
4066 : case B_WAL_SENDER:
4067 0 : backendDesc = "walsender";
4068 0 : break;
4069 : case B_WAL_WRITER:
4070 0 : backendDesc = "walwriter";
4071 0 : break;
4072 : }
4073 :
4074 0 : return backendDesc;
4075 : }
4076 :
4077 : /* ------------------------------------------------------------
4078 : * Local support functions follow
4079 : * ------------------------------------------------------------
4080 : */
4081 :
4082 :
4083 : /* ----------
4084 : * pgstat_setheader() -
4085 : *
4086 : * Set common header fields in a statistics message
4087 : * ----------
4088 : */
4089 : static void
4090 4870 : pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype)
4091 : {
4092 4870 : hdr->m_type = mtype;
4093 4870 : }
4094 :
4095 :
4096 : /* ----------
4097 : * pgstat_send() -
4098 : *
4099 : * Send out one statistics message to the collector
4100 : * ----------
4101 : */
4102 : static void
4103 4870 : pgstat_send(void *msg, int len)
4104 : {
4105 : int rc;
4106 :
4107 4870 : if (pgStatSock == PGINVALID_SOCKET)
4108 4870 : return;
4109 :
4110 4870 : ((PgStat_MsgHdr *) msg)->m_size = len;
4111 :
4112 : /* We'll retry after EINTR, but ignore all other failures */
4113 : do
4114 : {
4115 4870 : rc = send(pgStatSock, msg, len, 0);
4116 4870 : } while (rc < 0 && errno == EINTR);
4117 :
4118 : #ifdef USE_ASSERT_CHECKING
4119 : /* In debug builds, log send failures ... */
4120 4870 : if (rc < 0)
4121 0 : elog(LOG, "could not send to statistics collector: %m");
4122 : #endif
4123 : }
4124 :
4125 : /* ----------
4126 : * pgstat_send_archiver() -
4127 : *
4128 : * Tell the collector about the WAL file that we successfully
4129 : * archived or failed to archive.
4130 : * ----------
4131 : */
4132 : void
4133 0 : pgstat_send_archiver(const char *xlog, bool failed)
4134 : {
4135 : PgStat_MsgArchiver msg;
4136 :
4137 : /*
4138 : * Prepare and send the message
4139 : */
4140 0 : pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ARCHIVER);
4141 0 : msg.m_failed = failed;
4142 0 : StrNCpy(msg.m_xlog, xlog, sizeof(msg.m_xlog));
4143 0 : msg.m_timestamp = GetCurrentTimestamp();
4144 0 : pgstat_send(&msg, sizeof(msg));
4145 0 : }
4146 :
4147 : /* ----------
4148 : * pgstat_send_bgwriter() -
4149 : *
4150 : * Send bgwriter statistics to the collector
4151 : * ----------
4152 : */
4153 : void
4154 396 : pgstat_send_bgwriter(void)
4155 : {
4156 : /* We assume this initializes to zeroes */
4157 : static const PgStat_MsgBgWriter all_zeroes;
4158 :
4159 : /*
4160 : * This function can be called even if nothing at all has happened. In
4161 : * this case, avoid sending a completely empty message to the stats
4162 : * collector.
4163 : */
4164 396 : if (memcmp(&BgWriterStats, &all_zeroes, sizeof(PgStat_MsgBgWriter)) == 0)
4165 545 : return;
4166 :
4167 : /*
4168 : * Prepare and send the message
4169 : */
4170 247 : pgstat_setheader(&BgWriterStats.m_hdr, PGSTAT_MTYPE_BGWRITER);
4171 247 : pgstat_send(&BgWriterStats, sizeof(BgWriterStats));
4172 :
4173 : /*
4174 : * Clear out the statistics buffer, so it can be re-used.
4175 : */
4176 247 : MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
4177 : }
4178 :
4179 :
4180 : /* ----------
4181 : * PgstatCollectorMain() -
4182 : *
4183 : * Start up the statistics collector process. This is the body of the
4184 : * postmaster child process.
4185 : *
4186 : * The argc/argv parameters are valid only in EXEC_BACKEND case.
4187 : * ----------
4188 : */
4189 : NON_EXEC_STATIC void
4190 1 : PgstatCollectorMain(int argc, char *argv[])
4191 : {
4192 : int len;
4193 : PgStat_Msg msg;
4194 : int wr;
4195 :
4196 : /*
4197 : * Ignore all signals usually bound to some action in the postmaster,
4198 : * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
4199 : * support latch operations, because we only use a local latch.
4200 : */
4201 1 : pqsignal(SIGHUP, pgstat_sighup_handler);
4202 1 : pqsignal(SIGINT, SIG_IGN);
4203 1 : pqsignal(SIGTERM, SIG_IGN);
4204 1 : pqsignal(SIGQUIT, pgstat_exit);
4205 1 : pqsignal(SIGALRM, SIG_IGN);
4206 1 : pqsignal(SIGPIPE, SIG_IGN);
4207 1 : pqsignal(SIGUSR1, SIG_IGN);
4208 1 : pqsignal(SIGUSR2, SIG_IGN);
4209 1 : pqsignal(SIGCHLD, SIG_DFL);
4210 1 : pqsignal(SIGTTIN, SIG_DFL);
4211 1 : pqsignal(SIGTTOU, SIG_DFL);
4212 1 : pqsignal(SIGCONT, SIG_DFL);
4213 1 : pqsignal(SIGWINCH, SIG_DFL);
4214 1 : PG_SETMASK(&UnBlockSig);
4215 :
4216 : /*
4217 : * Identify myself via ps
4218 : */
4219 1 : init_ps_display("stats collector process", "", "", "");
4220 :
4221 : /*
4222 : * Read in existing stats files or initialize the stats to zero.
4223 : */
4224 1 : pgStatRunningInCollector = true;
4225 1 : pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true);
4226 :
4227 : /*
4228 : * Loop to process messages until we get SIGQUIT or detect ungraceful
4229 : * death of our parent postmaster.
4230 : *
4231 : * For performance reasons, we don't want to do ResetLatch/WaitLatch after
4232 : * every message; instead, do that only after a recv() fails to obtain a
4233 : * message. (This effectively means that if backends are sending us stuff
4234 : * like mad, we won't notice postmaster death until things slack off a
4235 : * bit; which seems fine.) To do that, we have an inner loop that
4236 : * iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
4237 : * the inner loop, which means that such interrupts will get serviced but
4238 : * the latch won't get cleared until next time there is a break in the
4239 : * action.
4240 : */
4241 : for (;;)
4242 : {
4243 : /* Clear any already-pending wakeups */
4244 2191 : ResetLatch(MyLatch);
4245 :
4246 : /*
4247 : * Quit if we get SIGQUIT from the postmaster.
4248 : */
4249 2191 : if (need_exit)
4250 1 : break;
4251 :
4252 : /*
4253 : * Inner loop iterates as long as we keep getting messages, or until
4254 : * need_exit becomes set.
4255 : */
4256 9184 : while (!need_exit)
4257 : {
4258 : /*
4259 : * Reload configuration if we got SIGHUP from the postmaster.
4260 : */
4261 6994 : if (got_SIGHUP)
4262 : {
4263 0 : got_SIGHUP = false;
4264 0 : ProcessConfigFile(PGC_SIGHUP);
4265 : }
4266 :
4267 : /*
4268 : * Write the stats file(s) if a new request has arrived that is
4269 : * not satisfied by existing file(s).
4270 : */
4271 6994 : if (pgstat_write_statsfile_needed())
4272 70 : pgstat_write_statsfiles(false, false);
4273 :
4274 : /*
4275 : * Try to receive and process a message. This will not block,
4276 : * since the socket is set to non-blocking mode.
4277 : *
4278 : * XXX On Windows, we have to force pgwin32_recv to cooperate,
4279 : * despite the previous use of pg_set_noblock() on the socket.
4280 : * This is extremely broken and should be fixed someday.
4281 : */
4282 : #ifdef WIN32
4283 : pgwin32_noblock = 1;
4284 : #endif
4285 :
4286 6994 : len = recv(pgStatSock, (char *) &msg,
4287 : sizeof(PgStat_Msg), 0);
4288 :
4289 : #ifdef WIN32
4290 : pgwin32_noblock = 0;
4291 : #endif
4292 :
4293 6994 : if (len < 0)
4294 : {
4295 2190 : if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
4296 : break; /* out of inner loop */
4297 0 : ereport(ERROR,
4298 : (errcode_for_socket_access(),
4299 : errmsg("could not read statistics message: %m")));
4300 : }
4301 :
4302 : /*
4303 : * We ignore messages that are smaller than our common header
4304 : */
4305 4804 : if (len < sizeof(PgStat_MsgHdr))
4306 0 : continue;
4307 :
4308 : /*
4309 : * The received length must match the length in the header
4310 : */
4311 4804 : if (msg.msg_hdr.m_size != len)
4312 0 : continue;
4313 :
4314 : /*
4315 : * O.K. - we accept this message. Process it.
4316 : */
4317 4804 : switch (msg.msg_hdr.m_type)
4318 : {
4319 : case PGSTAT_MTYPE_DUMMY:
4320 0 : break;
4321 :
4322 : case PGSTAT_MTYPE_INQUIRY:
4323 82 : pgstat_recv_inquiry((PgStat_MsgInquiry *) &msg, len);
4324 82 : break;
4325 :
4326 : case PGSTAT_MTYPE_TABSTAT:
4327 3984 : pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, len);
4328 3984 : break;
4329 :
4330 : case PGSTAT_MTYPE_TABPURGE:
4331 47 : pgstat_recv_tabpurge((PgStat_MsgTabpurge *) &msg, len);
4332 47 : break;
4333 :
4334 : case PGSTAT_MTYPE_DROPDB:
4335 0 : pgstat_recv_dropdb((PgStat_MsgDropdb *) &msg, len);
4336 0 : break;
4337 :
4338 : case PGSTAT_MTYPE_RESETCOUNTER:
4339 0 : pgstat_recv_resetcounter((PgStat_MsgResetcounter *) &msg,
4340 : len);
4341 0 : break;
4342 :
4343 : case PGSTAT_MTYPE_RESETSHAREDCOUNTER:
4344 0 : pgstat_recv_resetsharedcounter(
4345 : (PgStat_MsgResetsharedcounter *) &msg,
4346 : len);
4347 0 : break;
4348 :
4349 : case PGSTAT_MTYPE_RESETSINGLECOUNTER:
4350 0 : pgstat_recv_resetsinglecounter(
4351 : (PgStat_MsgResetsinglecounter *) &msg,
4352 : len);
4353 0 : break;
4354 :
4355 : case PGSTAT_MTYPE_AUTOVAC_START:
4356 3 : pgstat_recv_autovac((PgStat_MsgAutovacStart *) &msg, len);
4357 3 : break;
4358 :
4359 : case PGSTAT_MTYPE_VACUUM:
4360 299 : pgstat_recv_vacuum((PgStat_MsgVacuum *) &msg, len);
4361 299 : break;
4362 :
4363 : case PGSTAT_MTYPE_ANALYZE:
4364 136 : pgstat_recv_analyze((PgStat_MsgAnalyze *) &msg, len);
4365 136 : break;
4366 :
4367 : case PGSTAT_MTYPE_ARCHIVER:
4368 0 : pgstat_recv_archiver((PgStat_MsgArchiver *) &msg, len);
4369 0 : break;
4370 :
4371 : case PGSTAT_MTYPE_BGWRITER:
4372 243 : pgstat_recv_bgwriter((PgStat_MsgBgWriter *) &msg, len);
4373 243 : break;
4374 :
4375 : case PGSTAT_MTYPE_FUNCSTAT:
4376 0 : pgstat_recv_funcstat((PgStat_MsgFuncstat *) &msg, len);
4377 0 : break;
4378 :
4379 : case PGSTAT_MTYPE_FUNCPURGE:
4380 0 : pgstat_recv_funcpurge((PgStat_MsgFuncpurge *) &msg, len);
4381 0 : break;
4382 :
4383 : case PGSTAT_MTYPE_RECOVERYCONFLICT:
4384 0 : pgstat_recv_recoveryconflict((PgStat_MsgRecoveryConflict *) &msg, len);
4385 0 : break;
4386 :
4387 : case PGSTAT_MTYPE_DEADLOCK:
4388 0 : pgstat_recv_deadlock((PgStat_MsgDeadlock *) &msg, len);
4389 0 : break;
4390 :
4391 : case PGSTAT_MTYPE_TEMPFILE:
4392 10 : pgstat_recv_tempfile((PgStat_MsgTempFile *) &msg, len);
4393 10 : break;
4394 :
4395 : default:
4396 0 : break;
4397 : }
4398 : } /* end of inner message-processing loop */
4399 :
4400 : /* Sleep until there's something to do */
4401 : #ifndef WIN32
4402 2190 : wr = WaitLatchOrSocket(MyLatch,
4403 : WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE,
4404 : pgStatSock, -1L,
4405 : WAIT_EVENT_PGSTAT_MAIN);
4406 : #else
4407 :
4408 : /*
4409 : * Windows, at least in its Windows Server 2003 R2 incarnation,
4410 : * sometimes loses FD_READ events. Waking up and retrying the recv()
4411 : * fixes that, so don't sleep indefinitely. This is a crock of the
4412 : * first water, but until somebody wants to debug exactly what's
4413 : * happening there, this is the best we can do. The two-second
4414 : * timeout matches our pre-9.2 behavior, and needs to be short enough
4415 : * to not provoke "using stale statistics" complaints from
4416 : * backend_read_statsfile.
4417 : */
4418 : wr = WaitLatchOrSocket(MyLatch,
4419 : WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE | WL_TIMEOUT,
4420 : pgStatSock,
4421 : 2 * 1000L /* msec */ ,
4422 : WAIT_EVENT_PGSTAT_MAIN);
4423 : #endif
4424 :
4425 : /*
4426 : * Emergency bailout if postmaster has died. This is to avoid the
4427 : * necessity for manual cleanup of all postmaster children.
4428 : */
4429 2190 : if (wr & WL_POSTMASTER_DEATH)
4430 0 : break;
4431 2190 : } /* end of outer loop */
4432 :
4433 : /*
4434 : * Save the final stats to reuse at next startup.
4435 : */
4436 1 : pgstat_write_statsfiles(true, true);
4437 :
4438 1 : exit(0);
4439 : }
4440 :
4441 :
4442 : /* SIGQUIT signal handler for collector process */
4443 : static void
4444 1 : pgstat_exit(SIGNAL_ARGS)
4445 : {
4446 1 : int save_errno = errno;
4447 :
4448 1 : need_exit = true;
4449 1 : SetLatch(MyLatch);
4450 :
4451 1 : errno = save_errno;
4452 1 : }
4453 :
4454 : /* SIGHUP handler for collector process */
4455 : static void
4456 0 : pgstat_sighup_handler(SIGNAL_ARGS)
4457 : {
4458 0 : int save_errno = errno;
4459 :
4460 0 : got_SIGHUP = true;
4461 0 : SetLatch(MyLatch);
4462 :
4463 0 : errno = save_errno;
4464 0 : }
4465 :
4466 : /*
4467 : * Subroutine to clear stats in a database entry
4468 : *
4469 : * Tables and functions hashes are initialized to empty.
4470 : */
4471 : static void
4472 3 : reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
4473 : {
4474 : HASHCTL hash_ctl;
4475 :
4476 3 : dbentry->n_xact_commit = 0;
4477 3 : dbentry->n_xact_rollback = 0;
4478 3 : dbentry->n_blocks_fetched = 0;
4479 3 : dbentry->n_blocks_hit = 0;
4480 3 : dbentry->n_tuples_returned = 0;
4481 3 : dbentry->n_tuples_fetched = 0;
4482 3 : dbentry->n_tuples_inserted = 0;
4483 3 : dbentry->n_tuples_updated = 0;
4484 3 : dbentry->n_tuples_deleted = 0;
4485 3 : dbentry->last_autovac_time = 0;
4486 3 : dbentry->n_conflict_tablespace = 0;
4487 3 : dbentry->n_conflict_lock = 0;
4488 3 : dbentry->n_conflict_snapshot = 0;
4489 3 : dbentry->n_conflict_bufferpin = 0;
4490 3 : dbentry->n_conflict_startup_deadlock = 0;
4491 3 : dbentry->n_temp_files = 0;
4492 3 : dbentry->n_temp_bytes = 0;
4493 3 : dbentry->n_deadlocks = 0;
4494 3 : dbentry->n_block_read_time = 0;
4495 3 : dbentry->n_block_write_time = 0;
4496 :
4497 3 : dbentry->stat_reset_timestamp = GetCurrentTimestamp();
4498 3 : dbentry->stats_timestamp = 0;
4499 :
4500 3 : memset(&hash_ctl, 0, sizeof(hash_ctl));
4501 3 : hash_ctl.keysize = sizeof(Oid);
4502 3 : hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
4503 3 : dbentry->tables = hash_create("Per-database table",
4504 : PGSTAT_TAB_HASH_SIZE,
4505 : &hash_ctl,
4506 : HASH_ELEM | HASH_BLOBS);
4507 :
4508 3 : hash_ctl.keysize = sizeof(Oid);
4509 3 : hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
4510 3 : dbentry->functions = hash_create("Per-database function",
4511 : PGSTAT_FUNCTION_HASH_SIZE,
4512 : &hash_ctl,
4513 : HASH_ELEM | HASH_BLOBS);
4514 3 : }
4515 :
4516 : /*
4517 : * Lookup the hash table entry for the specified database. If no hash
4518 : * table entry exists, initialize it, if the create parameter is true.
4519 : * Else, return NULL.
4520 : */
4521 : static PgStat_StatDBEntry *
4522 4561 : pgstat_get_db_entry(Oid databaseid, bool create)
4523 : {
4524 : PgStat_StatDBEntry *result;
4525 : bool found;
4526 4561 : HASHACTION action = (create ? HASH_ENTER : HASH_FIND);
4527 :
4528 : /* Lookup or create the hash table entry for this database */
4529 4561 : result = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
4530 : &databaseid,
4531 : action, &found);
4532 :
4533 4561 : if (!create && !found)
4534 1 : return NULL;
4535 :
4536 : /*
4537 : * If not found, initialize the new one. This creates empty hash tables
4538 : * for tables and functions, too.
4539 : */
4540 4560 : if (!found)
4541 3 : reset_dbentry_counters(result);
4542 :
4543 4560 : return result;
4544 : }
4545 :
4546 :
4547 : /*
4548 : * Lookup the hash table entry for the specified table. If no hash
4549 : * table entry exists, initialize it, if the create parameter is true.
4550 : * Else, return NULL.
4551 : */
4552 : static PgStat_StatTabEntry *
4553 435 : pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
4554 : {
4555 : PgStat_StatTabEntry *result;
4556 : bool found;
4557 435 : HASHACTION action = (create ? HASH_ENTER : HASH_FIND);
4558 :
4559 : /* Lookup or create the hash table entry for this table */
4560 435 : result = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
4561 : &tableoid,
4562 : action, &found);
4563 :
4564 435 : if (!create && !found)
4565 0 : return NULL;
4566 :
4567 : /* If not found, initialize the new one. */
4568 435 : if (!found)
4569 : {
4570 97 : result->numscans = 0;
4571 97 : result->tuples_returned = 0;
4572 97 : result->tuples_fetched = 0;
4573 97 : result->tuples_inserted = 0;
4574 97 : result->tuples_updated = 0;
4575 97 : result->tuples_deleted = 0;
4576 97 : result->tuples_hot_updated = 0;
4577 97 : result->n_live_tuples = 0;
4578 97 : result->n_dead_tuples = 0;
4579 97 : result->changes_since_analyze = 0;
4580 97 : result->blocks_fetched = 0;
4581 97 : result->blocks_hit = 0;
4582 97 : result->vacuum_timestamp = 0;
4583 97 : result->vacuum_count = 0;
4584 97 : result->autovac_vacuum_timestamp = 0;
4585 97 : result->autovac_vacuum_count = 0;
4586 97 : result->analyze_timestamp = 0;
4587 97 : result->analyze_count = 0;
4588 97 : result->autovac_analyze_timestamp = 0;
4589 97 : result->autovac_analyze_count = 0;
4590 : }
4591 :
4592 435 : return result;
4593 : }
4594 :
4595 :
4596 : /* ----------
4597 : * pgstat_write_statsfiles() -
4598 : * Write the global statistics file, as well as requested DB files.
4599 : *
4600 : * 'permanent' specifies writing to the permanent files not temporary ones.
4601 : * When true (happens only when the collector is shutting down), also remove
4602 : * the temporary files so that backends starting up under a new postmaster
4603 : * can't read old data before the new collector is ready.
4604 : *
4605 : * When 'allDbs' is false, only the requested databases (listed in
4606 : * pending_write_requests) will be written; otherwise, all databases
4607 : * will be written.
4608 : * ----------
4609 : */
4610 : static void
4611 71 : pgstat_write_statsfiles(bool permanent, bool allDbs)
4612 : {
4613 : HASH_SEQ_STATUS hstat;
4614 : PgStat_StatDBEntry *dbentry;
4615 : FILE *fpout;
4616 : int32 format_id;
4617 71 : const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
4618 71 : const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
4619 : int rc;
4620 :
4621 71 : elog(DEBUG2, "writing stats file \"%s\"", statfile);
4622 :
4623 : /*
4624 : * Open the statistics temp file to write out the current values.
4625 : */
4626 71 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
4627 71 : if (fpout == NULL)
4628 : {
4629 0 : ereport(LOG,
4630 : (errcode_for_file_access(),
4631 : errmsg("could not open temporary statistics file \"%s\": %m",
4632 : tmpfile)));
4633 71 : return;
4634 : }
4635 :
4636 : /*
4637 : * Set the timestamp of the stats file.
4638 : */
4639 71 : globalStats.stats_timestamp = GetCurrentTimestamp();
4640 :
4641 : /*
4642 : * Write the file header --- currently just a format ID.
4643 : */
4644 71 : format_id = PGSTAT_FILE_FORMAT_ID;
4645 71 : rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
4646 : (void) rc; /* we'll check for error with ferror */
4647 :
4648 : /*
4649 : * Write global stats struct
4650 : */
4651 71 : rc = fwrite(&globalStats, sizeof(globalStats), 1, fpout);
4652 : (void) rc; /* we'll check for error with ferror */
4653 :
4654 : /*
4655 : * Write archiver stats struct
4656 : */
4657 71 : rc = fwrite(&archiverStats, sizeof(archiverStats), 1, fpout);
4658 : (void) rc; /* we'll check for error with ferror */
4659 :
4660 : /*
4661 : * Walk through the database table.
4662 : */
4663 71 : hash_seq_init(&hstat, pgStatDBHash);
4664 352 : while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
4665 : {
4666 : /*
4667 : * Write out the table and function stats for this DB into the
4668 : * appropriate per-DB stat file, if required.
4669 : */
4670 210 : if (allDbs || pgstat_db_requested(dbentry->databaseid))
4671 : {
4672 : /* Make DB's timestamp consistent with the global stats */
4673 138 : dbentry->stats_timestamp = globalStats.stats_timestamp;
4674 :
4675 138 : pgstat_write_db_statsfile(dbentry, permanent);
4676 : }
4677 :
4678 : /*
4679 : * Write out the DB entry. We don't write the tables or functions
4680 : * pointers, since they're of no use to any other process.
4681 : */
4682 210 : fputc('D', fpout);
4683 210 : rc = fwrite(dbentry, offsetof(PgStat_StatDBEntry, tables), 1, fpout);
4684 : (void) rc; /* we'll check for error with ferror */
4685 : }
4686 :
4687 : /*
4688 : * No more output to be done. Close the temp file and replace the old
4689 : * pgstat.stat with it. The ferror() check replaces testing for error
4690 : * after each individual fputc or fwrite above.
4691 : */
4692 71 : fputc('E', fpout);
4693 :
4694 71 : if (ferror(fpout))
4695 : {
4696 0 : ereport(LOG,
4697 : (errcode_for_file_access(),
4698 : errmsg("could not write temporary statistics file \"%s\": %m",
4699 : tmpfile)));
4700 0 : FreeFile(fpout);
4701 0 : unlink(tmpfile);
4702 : }
4703 71 : else if (FreeFile(fpout) < 0)
4704 : {
4705 0 : ereport(LOG,
4706 : (errcode_for_file_access(),
4707 : errmsg("could not close temporary statistics file \"%s\": %m",
4708 : tmpfile)));
4709 0 : unlink(tmpfile);
4710 : }
4711 71 : else if (rename(tmpfile, statfile) < 0)
4712 : {
4713 0 : ereport(LOG,
4714 : (errcode_for_file_access(),
4715 : errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
4716 : tmpfile, statfile)));
4717 0 : unlink(tmpfile);
4718 : }
4719 :
4720 71 : if (permanent)
4721 1 : unlink(pgstat_stat_filename);
4722 :
4723 : /*
4724 : * Now throw away the list of requests. Note that requests sent after we
4725 : * started the write are still waiting on the network socket.
4726 : */
4727 71 : list_free(pending_write_requests);
4728 71 : pending_write_requests = NIL;
4729 : }
4730 :
4731 : /*
4732 : * return the filename for a DB stat file; filename is the output buffer,
4733 : * of length len.
4734 : */
4735 : static void
4736 485 : get_dbstat_filename(bool permanent, bool tempname, Oid databaseid,
4737 : char *filename, int len)
4738 : {
4739 : int printed;
4740 :
4741 : /* NB -- pgstat_reset_remove_files knows about the pattern this uses */
4742 485 : printed = snprintf(filename, len, "%s/db_%u.%s",
4743 : permanent ? PGSTAT_STAT_PERMANENT_DIRECTORY :
4744 : pgstat_stat_directory,
4745 : databaseid,
4746 : tempname ? "tmp" : "stat");
4747 485 : if (printed > len)
4748 0 : elog(ERROR, "overlength pgstat path");
4749 485 : }
4750 :
4751 : /* ----------
4752 : * pgstat_write_db_statsfile() -
4753 : * Write the stat file for a single database.
4754 : *
4755 : * If writing to the permanent file (happens when the collector is
4756 : * shutting down only), remove the temporary file so that backends
4757 : * starting up under a new postmaster can't read the old data before
4758 : * the new collector is ready.
4759 : * ----------
4760 : */
4761 : static void
4762 138 : pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanent)
4763 : {
4764 : HASH_SEQ_STATUS tstat;
4765 : HASH_SEQ_STATUS fstat;
4766 : PgStat_StatTabEntry *tabentry;
4767 : PgStat_StatFuncEntry *funcentry;
4768 : FILE *fpout;
4769 : int32 format_id;
4770 138 : Oid dbid = dbentry->databaseid;
4771 : int rc;
4772 : char tmpfile[MAXPGPATH];
4773 : char statfile[MAXPGPATH];
4774 :
4775 138 : get_dbstat_filename(permanent, true, dbid, tmpfile, MAXPGPATH);
4776 138 : get_dbstat_filename(permanent, false, dbid, statfile, MAXPGPATH);
4777 :
4778 138 : elog(DEBUG2, "writing stats file \"%s\"", statfile);
4779 :
4780 : /*
4781 : * Open the statistics temp file to write out the current values.
4782 : */
4783 138 : fpout = AllocateFile(tmpfile, PG_BINARY_W);
4784 138 : if (fpout == NULL)
4785 : {
4786 0 : ereport(LOG,
4787 : (errcode_for_file_access(),
4788 : errmsg("could not open temporary statistics file \"%s\": %m",
4789 : tmpfile)));
4790 138 : return;
4791 : }
4792 :
4793 : /*
4794 : * Write the file header --- currently just a format ID.
4795 : */
4796 138 : format_id = PGSTAT_FILE_FORMAT_ID;
4797 138 : rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
4798 : (void) rc; /* we'll check for error with ferror */
4799 :
4800 : /*
4801 : * Walk through the database's access stats per table.
4802 : */
4803 138 : hash_seq_init(&tstat, dbentry->tables);
4804 34345 : while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
4805 : {
4806 34069 : fputc('T', fpout);
4807 34069 : rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
4808 : (void) rc; /* we'll check for error with ferror */
4809 : }
4810 :
4811 : /*
4812 : * Walk through the database's function stats table.
4813 : */
4814 138 : hash_seq_init(&fstat, dbentry->functions);
4815 276 : while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
4816 : {
4817 0 : fputc('F', fpout);
4818 0 : rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
4819 : (void) rc; /* we'll check for error with ferror */
4820 : }
4821 :
4822 : /*
4823 : * No more output to be done. Close the temp file and replace the old
4824 : * pgstat.stat with it. The ferror() check replaces testing for error
4825 : * after each individual fputc or fwrite above.
4826 : */
4827 138 : fputc('E', fpout);
4828 :
4829 138 : if (ferror(fpout))
4830 : {
4831 0 : ereport(LOG,
4832 : (errcode_for_file_access(),
4833 : errmsg("could not write temporary statistics file \"%s\": %m",
4834 : tmpfile)));
4835 0 : FreeFile(fpout);
4836 0 : unlink(tmpfile);
4837 : }
4838 138 : else if (FreeFile(fpout) < 0)
4839 : {
4840 0 : ereport(LOG,
4841 : (errcode_for_file_access(),
4842 : errmsg("could not close temporary statistics file \"%s\": %m",
4843 : tmpfile)));
4844 0 : unlink(tmpfile);
4845 : }
4846 138 : else if (rename(tmpfile, statfile) < 0)
4847 : {
4848 0 : ereport(LOG,
4849 : (errcode_for_file_access(),
4850 : errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
4851 : tmpfile, statfile)));
4852 0 : unlink(tmpfile);
4853 : }
4854 :
4855 138 : if (permanent)
4856 : {
4857 3 : get_dbstat_filename(false, false, dbid, statfile, MAXPGPATH);
4858 :
4859 3 : elog(DEBUG2, "removing temporary stats file \"%s\"", statfile);
4860 3 : unlink(statfile);
4861 : }
4862 : }
4863 :
4864 : /* ----------
4865 : * pgstat_read_statsfiles() -
4866 : *
4867 : * Reads in some existing statistics collector files and returns the
4868 : * databases hash table that is the top level of the data.
4869 : *
4870 : * If 'onlydb' is not InvalidOid, it means we only want data for that DB
4871 : * plus the shared catalogs ("DB 0"). We'll still populate the DB hash
4872 : * table for all databases, but we don't bother even creating table/function
4873 : * hash tables for other databases.
4874 : *
4875 : * 'permanent' specifies reading from the permanent files not temporary ones.
4876 : * When true (happens only when the collector is starting up), remove the
4877 : * files after reading; the in-memory status is now authoritative, and the
4878 : * files would be out of date in case somebody else reads them.
4879 : *
4880 : * If a 'deep' read is requested, table/function stats are read, otherwise
4881 : * the table/function hash tables remain empty.
4882 : * ----------
4883 : */
4884 : static HTAB *
4885 110 : pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
4886 : {
4887 : PgStat_StatDBEntry *dbentry;
4888 : PgStat_StatDBEntry dbbuf;
4889 : HASHCTL hash_ctl;
4890 : HTAB *dbhash;
4891 : FILE *fpin;
4892 : int32 format_id;
4893 : bool found;
4894 110 : const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
4895 :
4896 : /*
4897 : * The tables will live in pgStatLocalContext.
4898 : */
4899 110 : pgstat_setup_memcxt();
4900 :
4901 : /*
4902 : * Create the DB hashtable
4903 : */
4904 110 : memset(&hash_ctl, 0, sizeof(hash_ctl));
4905 110 : hash_ctl.keysize = sizeof(Oid);
4906 110 : hash_ctl.entrysize = sizeof(PgStat_StatDBEntry);
4907 110 : hash_ctl.hcxt = pgStatLocalContext;
4908 110 : dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl,
4909 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
4910 :
4911 : /*
4912 : * Clear out global and archiver statistics so they start from zero in
4913 : * case we can't load an existing statsfile.
4914 : */
4915 110 : memset(&globalStats, 0, sizeof(globalStats));
4916 110 : memset(&archiverStats, 0, sizeof(archiverStats));
4917 :
4918 : /*
4919 : * Set the current timestamp (will be kept only in case we can't load an
4920 : * existing statsfile).
4921 : */
4922 110 : globalStats.stat_reset_timestamp = GetCurrentTimestamp();
4923 110 : archiverStats.stat_reset_timestamp = globalStats.stat_reset_timestamp;
4924 :
4925 : /*
4926 : * Try to open the stats file. If it doesn't exist, the backends simply
4927 : * return zero for anything and the collector simply starts from scratch
4928 : * with empty counters.
4929 : *
4930 : * ENOENT is a possibility if the stats collector is not running or has
4931 : * not yet written the stats file the first time. Any other failure
4932 : * condition is suspicious.
4933 : */
4934 110 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
4935 : {
4936 1 : if (errno != ENOENT)
4937 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
4938 : (errcode_for_file_access(),
4939 : errmsg("could not open statistics file \"%s\": %m",
4940 : statfile)));
4941 1 : return dbhash;
4942 : }
4943 :
4944 : /*
4945 : * Verify it's of the expected format.
4946 : */
4947 218 : if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
4948 109 : format_id != PGSTAT_FILE_FORMAT_ID)
4949 : {
4950 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
4951 : (errmsg("corrupted statistics file \"%s\"", statfile)));
4952 0 : goto done;
4953 : }
4954 :
4955 : /*
4956 : * Read global stats struct
4957 : */
4958 109 : if (fread(&globalStats, 1, sizeof(globalStats), fpin) != sizeof(globalStats))
4959 : {
4960 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
4961 : (errmsg("corrupted statistics file \"%s\"", statfile)));
4962 0 : memset(&globalStats, 0, sizeof(globalStats));
4963 0 : goto done;
4964 : }
4965 :
4966 : /*
4967 : * In the collector, disregard the timestamp we read from the permanent
4968 : * stats file; we should be willing to write a temp stats file immediately
4969 : * upon the first request from any backend. This only matters if the old
4970 : * file's timestamp is less than PGSTAT_STAT_INTERVAL ago, but that's not
4971 : * an unusual scenario.
4972 : */
4973 109 : if (pgStatRunningInCollector)
4974 0 : globalStats.stats_timestamp = 0;
4975 :
4976 : /*
4977 : * Read archiver stats struct
4978 : */
4979 109 : if (fread(&archiverStats, 1, sizeof(archiverStats), fpin) != sizeof(archiverStats))
4980 : {
4981 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
4982 : (errmsg("corrupted statistics file \"%s\"", statfile)));
4983 0 : memset(&archiverStats, 0, sizeof(archiverStats));
4984 0 : goto done;
4985 : }
4986 :
4987 : /*
4988 : * We found an existing collector stats file. Read it and put all the
4989 : * hashtable entries into place.
4990 : */
4991 : for (;;)
4992 : {
4993 430 : switch (fgetc(fpin))
4994 : {
4995 : /*
4996 : * 'D' A PgStat_StatDBEntry struct describing a database
4997 : * follows.
4998 : */
4999 : case 'D':
5000 321 : if (fread(&dbbuf, 1, offsetof(PgStat_StatDBEntry, tables),
5001 : fpin) != offsetof(PgStat_StatDBEntry, tables))
5002 : {
5003 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5004 : (errmsg("corrupted statistics file \"%s\"",
5005 : statfile)));
5006 0 : goto done;
5007 : }
5008 :
5009 : /*
5010 : * Add to the DB hash
5011 : */
5012 321 : dbentry = (PgStat_StatDBEntry *) hash_search(dbhash,
5013 : (void *) &dbbuf.databaseid,
5014 : HASH_ENTER,
5015 : &found);
5016 321 : if (found)
5017 : {
5018 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5019 : (errmsg("corrupted statistics file \"%s\"",
5020 : statfile)));
5021 0 : goto done;
5022 : }
5023 :
5024 321 : memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
5025 321 : dbentry->tables = NULL;
5026 321 : dbentry->functions = NULL;
5027 :
5028 : /*
5029 : * In the collector, disregard the timestamp we read from the
5030 : * permanent stats file; we should be willing to write a temp
5031 : * stats file immediately upon the first request from any
5032 : * backend.
5033 : */
5034 321 : if (pgStatRunningInCollector)
5035 0 : dbentry->stats_timestamp = 0;
5036 :
5037 : /*
5038 : * Don't create tables/functions hashtables for uninteresting
5039 : * databases.
5040 : */
5041 321 : if (onlydb != InvalidOid)
5042 : {
5043 515 : if (dbbuf.databaseid != onlydb &&
5044 206 : dbbuf.databaseid != InvalidOid)
5045 103 : break;
5046 : }
5047 :
5048 218 : memset(&hash_ctl, 0, sizeof(hash_ctl));
5049 218 : hash_ctl.keysize = sizeof(Oid);
5050 218 : hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
5051 218 : hash_ctl.hcxt = pgStatLocalContext;
5052 218 : dbentry->tables = hash_create("Per-database table",
5053 : PGSTAT_TAB_HASH_SIZE,
5054 : &hash_ctl,
5055 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
5056 :
5057 218 : hash_ctl.keysize = sizeof(Oid);
5058 218 : hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
5059 218 : hash_ctl.hcxt = pgStatLocalContext;
5060 218 : dbentry->functions = hash_create("Per-database function",
5061 : PGSTAT_FUNCTION_HASH_SIZE,
5062 : &hash_ctl,
5063 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
5064 :
5065 : /*
5066 : * If requested, read the data from the database-specific
5067 : * file. Otherwise we just leave the hashtables empty.
5068 : */
5069 218 : if (deep)
5070 206 : pgstat_read_db_statsfile(dbentry->databaseid,
5071 : dbentry->tables,
5072 : dbentry->functions,
5073 : permanent);
5074 :
5075 218 : break;
5076 :
5077 : case 'E':
5078 109 : goto done;
5079 :
5080 : default:
5081 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5082 : (errmsg("corrupted statistics file \"%s\"",
5083 : statfile)));
5084 0 : goto done;
5085 : }
5086 321 : }
5087 :
5088 : done:
5089 109 : FreeFile(fpin);
5090 :
5091 : /* If requested to read the permanent file, also get rid of it. */
5092 109 : if (permanent)
5093 : {
5094 0 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
5095 0 : unlink(statfile);
5096 : }
5097 :
5098 109 : return dbhash;
5099 : }
5100 :
5101 :
5102 : /* ----------
5103 : * pgstat_read_db_statsfile() -
5104 : *
5105 : * Reads in the existing statistics collector file for the given database,
5106 : * filling the passed-in tables and functions hash tables.
5107 : *
5108 : * As in pgstat_read_statsfiles, if the permanent file is requested, it is
5109 : * removed after reading.
5110 : *
5111 : * Note: this code has the ability to skip storing per-table or per-function
5112 : * data, if NULL is passed for the corresponding hashtable. That's not used
5113 : * at the moment though.
5114 : * ----------
5115 : */
5116 : static void
5117 206 : pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash,
5118 : bool permanent)
5119 : {
5120 : PgStat_StatTabEntry *tabentry;
5121 : PgStat_StatTabEntry tabbuf;
5122 : PgStat_StatFuncEntry funcbuf;
5123 : PgStat_StatFuncEntry *funcentry;
5124 : FILE *fpin;
5125 : int32 format_id;
5126 : bool found;
5127 : char statfile[MAXPGPATH];
5128 :
5129 206 : get_dbstat_filename(permanent, false, databaseid, statfile, MAXPGPATH);
5130 :
5131 : /*
5132 : * Try to open the stats file. If it doesn't exist, the backends simply
5133 : * return zero for anything and the collector simply starts from scratch
5134 : * with empty counters.
5135 : *
5136 : * ENOENT is a possibility if the stats collector is not running or has
5137 : * not yet written the stats file the first time. Any other failure
5138 : * condition is suspicious.
5139 : */
5140 206 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
5141 : {
5142 0 : if (errno != ENOENT)
5143 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5144 : (errcode_for_file_access(),
5145 : errmsg("could not open statistics file \"%s\": %m",
5146 : statfile)));
5147 206 : return;
5148 : }
5149 :
5150 : /*
5151 : * Verify it's of the expected format.
5152 : */
5153 412 : if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
5154 206 : format_id != PGSTAT_FILE_FORMAT_ID)
5155 : {
5156 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5157 : (errmsg("corrupted statistics file \"%s\"", statfile)));
5158 0 : goto done;
5159 : }
5160 :
5161 : /*
5162 : * We found an existing collector stats file. Read it and put all the
5163 : * hashtable entries into place.
5164 : */
5165 : for (;;)
5166 : {
5167 51557 : switch (fgetc(fpin))
5168 : {
5169 : /*
5170 : * 'T' A PgStat_StatTabEntry follows.
5171 : */
5172 : case 'T':
5173 51351 : if (fread(&tabbuf, 1, sizeof(PgStat_StatTabEntry),
5174 : fpin) != sizeof(PgStat_StatTabEntry))
5175 : {
5176 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5177 : (errmsg("corrupted statistics file \"%s\"",
5178 : statfile)));
5179 0 : goto done;
5180 : }
5181 :
5182 : /*
5183 : * Skip if table data not wanted.
5184 : */
5185 51351 : if (tabhash == NULL)
5186 0 : break;
5187 :
5188 51351 : tabentry = (PgStat_StatTabEntry *) hash_search(tabhash,
5189 : (void *) &tabbuf.tableid,
5190 : HASH_ENTER, &found);
5191 :
5192 51351 : if (found)
5193 : {
5194 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5195 : (errmsg("corrupted statistics file \"%s\"",
5196 : statfile)));
5197 0 : goto done;
5198 : }
5199 :
5200 51351 : memcpy(tabentry, &tabbuf, sizeof(tabbuf));
5201 51351 : break;
5202 :
5203 : /*
5204 : * 'F' A PgStat_StatFuncEntry follows.
5205 : */
5206 : case 'F':
5207 0 : if (fread(&funcbuf, 1, sizeof(PgStat_StatFuncEntry),
5208 : fpin) != sizeof(PgStat_StatFuncEntry))
5209 : {
5210 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5211 : (errmsg("corrupted statistics file \"%s\"",
5212 : statfile)));
5213 0 : goto done;
5214 : }
5215 :
5216 : /*
5217 : * Skip if function data not wanted.
5218 : */
5219 0 : if (funchash == NULL)
5220 0 : break;
5221 :
5222 0 : funcentry = (PgStat_StatFuncEntry *) hash_search(funchash,
5223 : (void *) &funcbuf.functionid,
5224 : HASH_ENTER, &found);
5225 :
5226 0 : if (found)
5227 : {
5228 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5229 : (errmsg("corrupted statistics file \"%s\"",
5230 : statfile)));
5231 0 : goto done;
5232 : }
5233 :
5234 0 : memcpy(funcentry, &funcbuf, sizeof(funcbuf));
5235 0 : break;
5236 :
5237 : /*
5238 : * 'E' The EOF marker of a complete stats file.
5239 : */
5240 : case 'E':
5241 206 : goto done;
5242 :
5243 : default:
5244 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5245 : (errmsg("corrupted statistics file \"%s\"",
5246 : statfile)));
5247 0 : goto done;
5248 : }
5249 51351 : }
5250 :
5251 : done:
5252 206 : FreeFile(fpin);
5253 :
5254 206 : if (permanent)
5255 : {
5256 0 : elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
5257 0 : unlink(statfile);
5258 : }
5259 : }
5260 :
5261 : /* ----------
5262 : * pgstat_read_db_statsfile_timestamp() -
5263 : *
5264 : * Attempt to determine the timestamp of the last db statfile write.
5265 : * Returns TRUE if successful; the timestamp is stored in *ts.
5266 : *
5267 : * This needs to be careful about handling databases for which no stats file
5268 : * exists, such as databases without a stat entry or those not yet written:
5269 : *
5270 : * - if there's a database entry in the global file, return the corresponding
5271 : * stats_timestamp value.
5272 : *
5273 : * - if there's no db stat entry (e.g. for a new or inactive database),
5274 : * there's no stats_timestamp value, but also nothing to write so we return
5275 : * the timestamp of the global statfile.
5276 : * ----------
5277 : */
5278 : static bool
5279 1262 : pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent,
5280 : TimestampTz *ts)
5281 : {
5282 : PgStat_StatDBEntry dbentry;
5283 : PgStat_GlobalStats myGlobalStats;
5284 : PgStat_ArchiverStats myArchiverStats;
5285 : FILE *fpin;
5286 : int32 format_id;
5287 1262 : const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
5288 :
5289 : /*
5290 : * Try to open the stats file. As above, anything but ENOENT is worthy of
5291 : * complaining about.
5292 : */
5293 1262 : if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
5294 : {
5295 4 : if (errno != ENOENT)
5296 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5297 : (errcode_for_file_access(),
5298 : errmsg("could not open statistics file \"%s\": %m",
5299 : statfile)));
5300 4 : return false;
5301 : }
5302 :
5303 : /*
5304 : * Verify it's of the expected format.
5305 : */
5306 2516 : if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) ||
5307 1258 : format_id != PGSTAT_FILE_FORMAT_ID)
5308 : {
5309 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5310 : (errmsg("corrupted statistics file \"%s\"", statfile)));
5311 0 : FreeFile(fpin);
5312 0 : return false;
5313 : }
5314 :
5315 : /*
5316 : * Read global stats struct
5317 : */
5318 1258 : if (fread(&myGlobalStats, 1, sizeof(myGlobalStats),
5319 : fpin) != sizeof(myGlobalStats))
5320 : {
5321 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5322 : (errmsg("corrupted statistics file \"%s\"", statfile)));
5323 0 : FreeFile(fpin);
5324 0 : return false;
5325 : }
5326 :
5327 : /*
5328 : * Read archiver stats struct
5329 : */
5330 1258 : if (fread(&myArchiverStats, 1, sizeof(myArchiverStats),
5331 : fpin) != sizeof(myArchiverStats))
5332 : {
5333 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5334 : (errmsg("corrupted statistics file \"%s\"", statfile)));
5335 0 : FreeFile(fpin);
5336 0 : return false;
5337 : }
5338 :
5339 : /* By default, we're going to return the timestamp of the global file. */
5340 1258 : *ts = myGlobalStats.stats_timestamp;
5341 :
5342 : /*
5343 : * We found an existing collector stats file. Read it and look for a
5344 : * record for the requested database. If found, use its timestamp.
5345 : */
5346 : for (;;)
5347 : {
5348 2484 : switch (fgetc(fpin))
5349 : {
5350 : /*
5351 : * 'D' A PgStat_StatDBEntry struct describing a database
5352 : * follows.
5353 : */
5354 : case 'D':
5355 2481 : if (fread(&dbentry, 1, offsetof(PgStat_StatDBEntry, tables),
5356 : fpin) != offsetof(PgStat_StatDBEntry, tables))
5357 : {
5358 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5359 : (errmsg("corrupted statistics file \"%s\"",
5360 : statfile)));
5361 0 : goto done;
5362 : }
5363 :
5364 : /*
5365 : * If this is the DB we're looking for, save its timestamp and
5366 : * we're done.
5367 : */
5368 2481 : if (dbentry.databaseid == databaseid)
5369 : {
5370 1255 : *ts = dbentry.stats_timestamp;
5371 1255 : goto done;
5372 : }
5373 :
5374 1226 : break;
5375 :
5376 : case 'E':
5377 3 : goto done;
5378 :
5379 : default:
5380 0 : ereport(pgStatRunningInCollector ? LOG : WARNING,
5381 : (errmsg("corrupted statistics file \"%s\"",
5382 : statfile)));
5383 0 : goto done;
5384 : }
5385 1226 : }
5386 :
5387 : done:
5388 1258 : FreeFile(fpin);
5389 1258 : return true;
5390 : }
5391 :
5392 : /*
5393 : * If not already done, read the statistics collector stats file into
5394 : * some hash tables. The results will be kept until pgstat_clear_snapshot()
5395 : * is called (typically, at end of transaction).
5396 : */
5397 : static void
5398 267 : backend_read_statsfile(void)
5399 : {
5400 267 : TimestampTz min_ts = 0;
5401 267 : TimestampTz ref_ts = 0;
5402 : Oid inquiry_db;
5403 : int count;
5404 :
5405 : /* already read it? */
5406 267 : if (pgStatDBHash)
5407 425 : return;
5408 109 : Assert(!pgStatRunningInCollector);
5409 :
5410 : /*
5411 : * In a normal backend, we check staleness of the data for our own DB, and
5412 : * so we send MyDatabaseId in inquiry messages. In the autovac launcher,
5413 : * check staleness of the shared-catalog data, and send InvalidOid in
5414 : * inquiry messages so as not to force writing unnecessary data.
5415 : */
5416 109 : if (IsAutoVacuumLauncherProcess())
5417 6 : inquiry_db = InvalidOid;
5418 : else
5419 103 : inquiry_db = MyDatabaseId;
5420 :
5421 : /*
5422 : * Loop until fresh enough stats file is available or we ran out of time.
5423 : * The stats inquiry message is sent repeatedly in case collector drops
5424 : * it; but not every single time, as that just swamps the collector.
5425 : */
5426 2524 : for (count = 0; count < PGSTAT_POLL_LOOP_COUNT; count++)
5427 : {
5428 : bool ok;
5429 1262 : TimestampTz file_ts = 0;
5430 : TimestampTz cur_ts;
5431 :
5432 1262 : CHECK_FOR_INTERRUPTS();
5433 :
5434 1262 : ok = pgstat_read_db_statsfile_timestamp(inquiry_db, false, &file_ts);
5435 :
5436 1262 : cur_ts = GetCurrentTimestamp();
5437 : /* Calculate min acceptable timestamp, if we didn't already */
5438 1262 : if (count == 0 || cur_ts < ref_ts)
5439 : {
5440 : /*
5441 : * We set the minimum acceptable timestamp to PGSTAT_STAT_INTERVAL
5442 : * msec before now. This indirectly ensures that the collector
5443 : * needn't write the file more often than PGSTAT_STAT_INTERVAL. In
5444 : * an autovacuum worker, however, we want a lower delay to avoid
5445 : * using stale data, so we use PGSTAT_RETRY_DELAY (since the
5446 : * number of workers is low, this shouldn't be a problem).
5447 : *
5448 : * We don't recompute min_ts after sleeping, except in the
5449 : * unlikely case that cur_ts went backwards. So we might end up
5450 : * accepting a file a bit older than PGSTAT_STAT_INTERVAL. In
5451 : * practice that shouldn't happen, though, as long as the sleep
5452 : * time is less than PGSTAT_STAT_INTERVAL; and we don't want to
5453 : * tell the collector that our cutoff time is less than what we'd
5454 : * actually accept.
5455 : */
5456 109 : ref_ts = cur_ts;
5457 109 : if (IsAutoVacuumWorkerProcess())
5458 49 : min_ts = TimestampTzPlusMilliseconds(ref_ts,
5459 : -PGSTAT_RETRY_DELAY);
5460 : else
5461 60 : min_ts = TimestampTzPlusMilliseconds(ref_ts,
5462 : -PGSTAT_STAT_INTERVAL);
5463 : }
5464 :
5465 : /*
5466 : * If the file timestamp is actually newer than cur_ts, we must have
5467 : * had a clock glitch (system time went backwards) or there is clock
5468 : * skew between our processor and the stats collector's processor.
5469 : * Accept the file, but send an inquiry message anyway to make
5470 : * pgstat_recv_inquiry do a sanity check on the collector's time.
5471 : */
5472 1262 : if (ok && file_ts > cur_ts)
5473 : {
5474 : /*
5475 : * A small amount of clock skew between processors isn't terribly
5476 : * surprising, but a large difference is worth logging. We
5477 : * arbitrarily define "large" as 1000 msec.
5478 : */
5479 0 : if (file_ts >= TimestampTzPlusMilliseconds(cur_ts, 1000))
5480 : {
5481 : char *filetime;
5482 : char *mytime;
5483 :
5484 : /* Copy because timestamptz_to_str returns a static buffer */
5485 0 : filetime = pstrdup(timestamptz_to_str(file_ts));
5486 0 : mytime = pstrdup(timestamptz_to_str(cur_ts));
5487 0 : elog(LOG, "stats collector's time %s is later than backend local time %s",
5488 : filetime, mytime);
5489 0 : pfree(filetime);
5490 0 : pfree(mytime);
5491 : }
5492 :
5493 0 : pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
5494 0 : break;
5495 : }
5496 :
5497 : /* Normal acceptance case: file is not older than cutoff time */
5498 1262 : if (ok && file_ts >= min_ts)
5499 109 : break;
5500 :
5501 : /* Not there or too old, so kick the collector and wait a bit */
5502 1153 : if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
5503 83 : pgstat_send_inquiry(cur_ts, min_ts, inquiry_db);
5504 :
5505 1153 : pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
5506 : }
5507 :
5508 109 : if (count >= PGSTAT_POLL_LOOP_COUNT)
5509 0 : ereport(LOG,
5510 : (errmsg("using stale statistics instead of current ones "
5511 : "because stats collector is not responding")));
5512 :
5513 : /*
5514 : * Autovacuum launcher wants stats about all databases, but a shallow read
5515 : * is sufficient. Regular backends want a deep read for just the tables
5516 : * they can see (MyDatabaseId + shared catalogs).
5517 : */
5518 109 : if (IsAutoVacuumLauncherProcess())
5519 6 : pgStatDBHash = pgstat_read_statsfiles(InvalidOid, false, false);
5520 : else
5521 103 : pgStatDBHash = pgstat_read_statsfiles(MyDatabaseId, false, true);
5522 : }
5523 :
5524 :
5525 : /* ----------
5526 : * pgstat_setup_memcxt() -
5527 : *
5528 : * Create pgStatLocalContext, if not already done.
5529 : * ----------
5530 : */
5531 : static void
5532 110 : pgstat_setup_memcxt(void)
5533 : {
5534 110 : if (!pgStatLocalContext)
5535 110 : pgStatLocalContext = AllocSetContextCreate(TopMemoryContext,
5536 : "Statistics snapshot",
5537 : ALLOCSET_SMALL_SIZES);
5538 110 : }
5539 :
5540 :
5541 : /* ----------
5542 : * pgstat_clear_snapshot() -
5543 : *
5544 : * Discard any data collected in the current transaction. Any subsequent
5545 : * request will cause new snapshots to be read.
5546 : *
5547 : * This is also invoked during transaction commit or abort to discard
5548 : * the no-longer-wanted snapshot.
5549 : * ----------
5550 : */
5551 : void
5552 26278 : pgstat_clear_snapshot(void)
5553 : {
5554 : /* Release memory, if any was allocated */
5555 26278 : if (pgStatLocalContext)
5556 109 : MemoryContextDelete(pgStatLocalContext);
5557 :
5558 : /* Reset variables */
5559 26278 : pgStatLocalContext = NULL;
5560 26278 : pgStatDBHash = NULL;
5561 26278 : localBackendStatusTable = NULL;
5562 26278 : localNumBackends = 0;
5563 26278 : }
5564 :
5565 :
5566 : /* ----------
5567 : * pgstat_recv_inquiry() -
5568 : *
5569 : * Process stat inquiry requests.
5570 : * ----------
5571 : */
5572 : static void
5573 82 : pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
5574 : {
5575 : PgStat_StatDBEntry *dbentry;
5576 :
5577 82 : elog(DEBUG2, "received inquiry for database %u", msg->databaseid);
5578 :
5579 : /*
5580 : * If there's already a write request for this DB, there's nothing to do.
5581 : *
5582 : * Note that if a request is found, we return early and skip the below
5583 : * check for clock skew. This is okay, since the only way for a DB
5584 : * request to be present in the list is that we have been here since the
5585 : * last write round. It seems sufficient to check for clock skew once per
5586 : * write round.
5587 : */
5588 82 : if (list_member_oid(pending_write_requests, msg->databaseid))
5589 0 : return;
5590 :
5591 : /*
5592 : * Check to see if we last wrote this database at a time >= the requested
5593 : * cutoff time. If so, this is a stale request that was generated before
5594 : * we updated the DB file, and we don't need to do so again.
5595 : *
5596 : * If the requestor's local clock time is older than stats_timestamp, we
5597 : * should suspect a clock glitch, ie system time going backwards; though
5598 : * the more likely explanation is just delayed message receipt. It is
5599 : * worth expending a GetCurrentTimestamp call to be sure, since a large
5600 : * retreat in the system clock reading could otherwise cause us to neglect
5601 : * to update the stats file for a long time.
5602 : */
5603 82 : dbentry = pgstat_get_db_entry(msg->databaseid, false);
5604 82 : if (dbentry == NULL)
5605 : {
5606 : /*
5607 : * We have no data for this DB. Enter a write request anyway so that
5608 : * the global stats will get updated. This is needed to prevent
5609 : * backend_read_statsfile from waiting for data that we cannot supply,
5610 : * in the case of a new DB that nobody has yet reported any stats for.
5611 : * See the behavior of pgstat_read_db_statsfile_timestamp.
5612 : */
5613 : }
5614 81 : else if (msg->clock_time < dbentry->stats_timestamp)
5615 : {
5616 4 : TimestampTz cur_ts = GetCurrentTimestamp();
5617 :
5618 4 : if (cur_ts < dbentry->stats_timestamp)
5619 : {
5620 : /*
5621 : * Sure enough, time went backwards. Force a new stats file write
5622 : * to get back in sync; but first, log a complaint.
5623 : */
5624 : char *writetime;
5625 : char *mytime;
5626 :
5627 : /* Copy because timestamptz_to_str returns a static buffer */
5628 0 : writetime = pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
5629 0 : mytime = pstrdup(timestamptz_to_str(cur_ts));
5630 0 : elog(LOG,
5631 : "stats_timestamp %s is later than collector's time %s for database %u",
5632 : writetime, mytime, dbentry->databaseid);
5633 0 : pfree(writetime);
5634 0 : pfree(mytime);
5635 : }
5636 : else
5637 : {
5638 : /*
5639 : * Nope, it's just an old request. Assuming msg's clock_time is
5640 : * >= its cutoff_time, it must be stale, so we can ignore it.
5641 : */
5642 4 : return;
5643 : }
5644 : }
5645 77 : else if (msg->cutoff_time <= dbentry->stats_timestamp)
5646 : {
5647 : /* Stale request, ignore it */
5648 8 : return;
5649 : }
5650 :
5651 : /*
5652 : * We need to write this DB, so create a request.
5653 : */
5654 70 : pending_write_requests = lappend_oid(pending_write_requests,
5655 : msg->databaseid);
5656 : }
5657 :
5658 :
5659 : /* ----------
5660 : * pgstat_recv_tabstat() -
5661 : *
5662 : * Count what the backend has done.
5663 : * ----------
5664 : */
5665 : static void
5666 3984 : pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
5667 : {
5668 : PgStat_StatDBEntry *dbentry;
5669 : PgStat_StatTabEntry *tabentry;
5670 : int i;
5671 : bool found;
5672 :
5673 3984 : dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5674 :
5675 : /*
5676 : * Update database-wide stats.
5677 : */
5678 3984 : dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
5679 3984 : dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
5680 3984 : dbentry->n_block_read_time += msg->m_block_read_time;
5681 3984 : dbentry->n_block_write_time += msg->m_block_write_time;
5682 :
5683 : /*
5684 : * Process all table entries in the message.
5685 : */
5686 33843 : for (i = 0; i < msg->m_nentries; i++)
5687 : {
5688 29859 : PgStat_TableEntry *tabmsg = &(msg->m_entry[i]);
5689 :
5690 29859 : tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
5691 29859 : (void *) &(tabmsg->t_id),
5692 : HASH_ENTER, &found);
5693 :
5694 29859 : if (!found)
5695 : {
5696 : /*
5697 : * If it's a new table entry, initialize counters to the values we
5698 : * just got.
5699 : */
5700 2492 : tabentry->numscans = tabmsg->t_counts.t_numscans;
5701 2492 : tabentry->tuples_returned = tabmsg->t_counts.t_tuples_returned;
5702 2492 : tabentry->tuples_fetched = tabmsg->t_counts.t_tuples_fetched;
5703 2492 : tabentry->tuples_inserted = tabmsg->t_counts.t_tuples_inserted;
5704 2492 : tabentry->tuples_updated = tabmsg->t_counts.t_tuples_updated;
5705 2492 : tabentry->tuples_deleted = tabmsg->t_counts.t_tuples_deleted;
5706 2492 : tabentry->tuples_hot_updated = tabmsg->t_counts.t_tuples_hot_updated;
5707 2492 : tabentry->n_live_tuples = tabmsg->t_counts.t_delta_live_tuples;
5708 2492 : tabentry->n_dead_tuples = tabmsg->t_counts.t_delta_dead_tuples;
5709 2492 : tabentry->changes_since_analyze = tabmsg->t_counts.t_changed_tuples;
5710 2492 : tabentry->blocks_fetched = tabmsg->t_counts.t_blocks_fetched;
5711 2492 : tabentry->blocks_hit = tabmsg->t_counts.t_blocks_hit;
5712 :
5713 2492 : tabentry->vacuum_timestamp = 0;
5714 2492 : tabentry->vacuum_count = 0;
5715 2492 : tabentry->autovac_vacuum_timestamp = 0;
5716 2492 : tabentry->autovac_vacuum_count = 0;
5717 2492 : tabentry->analyze_timestamp = 0;
5718 2492 : tabentry->analyze_count = 0;
5719 2492 : tabentry->autovac_analyze_timestamp = 0;
5720 2492 : tabentry->autovac_analyze_count = 0;
5721 : }
5722 : else
5723 : {
5724 : /*
5725 : * Otherwise add the values to the existing entry.
5726 : */
5727 27367 : tabentry->numscans += tabmsg->t_counts.t_numscans;
5728 27367 : tabentry->tuples_returned += tabmsg->t_counts.t_tuples_returned;
5729 27367 : tabentry->tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
5730 27367 : tabentry->tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
5731 27367 : tabentry->tuples_updated += tabmsg->t_counts.t_tuples_updated;
5732 27367 : tabentry->tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
5733 27367 : tabentry->tuples_hot_updated += tabmsg->t_counts.t_tuples_hot_updated;
5734 : /* If table was truncated, first reset the live/dead counters */
5735 27367 : if (tabmsg->t_counts.t_truncated)
5736 : {
5737 10 : tabentry->n_live_tuples = 0;
5738 10 : tabentry->n_dead_tuples = 0;
5739 : }
5740 27367 : tabentry->n_live_tuples += tabmsg->t_counts.t_delta_live_tuples;
5741 27367 : tabentry->n_dead_tuples += tabmsg->t_counts.t_delta_dead_tuples;
5742 27367 : tabentry->changes_since_analyze += tabmsg->t_counts.t_changed_tuples;
5743 27367 : tabentry->blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
5744 27367 : tabentry->blocks_hit += tabmsg->t_counts.t_blocks_hit;
5745 : }
5746 :
5747 : /* Clamp n_live_tuples in case of negative delta_live_tuples */
5748 29859 : tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0);
5749 : /* Likewise for n_dead_tuples */
5750 29859 : tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0);
5751 :
5752 : /*
5753 : * Add per-table stats to the per-database entry, too.
5754 : */
5755 29859 : dbentry->n_tuples_returned += tabmsg->t_counts.t_tuples_returned;
5756 29859 : dbentry->n_tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
5757 29859 : dbentry->n_tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
5758 29859 : dbentry->n_tuples_updated += tabmsg->t_counts.t_tuples_updated;
5759 29859 : dbentry->n_tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
5760 29859 : dbentry->n_blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
5761 29859 : dbentry->n_blocks_hit += tabmsg->t_counts.t_blocks_hit;
5762 : }
5763 3984 : }
5764 :
5765 :
5766 : /* ----------
5767 : * pgstat_recv_tabpurge() -
5768 : *
5769 : * Arrange for dead table removal.
5770 : * ----------
5771 : */
5772 : static void
5773 47 : pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
5774 : {
5775 : PgStat_StatDBEntry *dbentry;
5776 : int i;
5777 :
5778 47 : dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5779 :
5780 : /*
5781 : * No need to purge if we don't even know the database.
5782 : */
5783 47 : if (!dbentry || !dbentry->tables)
5784 47 : return;
5785 :
5786 : /*
5787 : * Process all table entries in the message.
5788 : */
5789 3528 : for (i = 0; i < msg->m_nentries; i++)
5790 : {
5791 : /* Remove from hashtable if present; we don't care if it's not. */
5792 3481 : (void) hash_search(dbentry->tables,
5793 3481 : (void *) &(msg->m_tableid[i]),
5794 : HASH_REMOVE, NULL);
5795 : }
5796 : }
5797 :
5798 :
5799 : /* ----------
5800 : * pgstat_recv_dropdb() -
5801 : *
5802 : * Arrange for dead database removal
5803 : * ----------
5804 : */
5805 : static void
5806 0 : pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
5807 : {
5808 0 : Oid dbid = msg->m_databaseid;
5809 : PgStat_StatDBEntry *dbentry;
5810 :
5811 : /*
5812 : * Lookup the database in the hashtable.
5813 : */
5814 0 : dbentry = pgstat_get_db_entry(dbid, false);
5815 :
5816 : /*
5817 : * If found, remove it (along with the db statfile).
5818 : */
5819 0 : if (dbentry)
5820 : {
5821 : char statfile[MAXPGPATH];
5822 :
5823 0 : get_dbstat_filename(false, false, dbid, statfile, MAXPGPATH);
5824 :
5825 0 : elog(DEBUG2, "removing stats file \"%s\"", statfile);
5826 0 : unlink(statfile);
5827 :
5828 0 : if (dbentry->tables != NULL)
5829 0 : hash_destroy(dbentry->tables);
5830 0 : if (dbentry->functions != NULL)
5831 0 : hash_destroy(dbentry->functions);
5832 :
5833 0 : if (hash_search(pgStatDBHash,
5834 : (void *) &dbid,
5835 : HASH_REMOVE, NULL) == NULL)
5836 0 : ereport(ERROR,
5837 : (errmsg("database hash table corrupted during cleanup --- abort")));
5838 : }
5839 0 : }
5840 :
5841 :
5842 : /* ----------
5843 : * pgstat_recv_resetcounter() -
5844 : *
5845 : * Reset the statistics for the specified database.
5846 : * ----------
5847 : */
5848 : static void
5849 0 : pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
5850 : {
5851 : PgStat_StatDBEntry *dbentry;
5852 :
5853 : /*
5854 : * Lookup the database in the hashtable. Nothing to do if not there.
5855 : */
5856 0 : dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5857 :
5858 0 : if (!dbentry)
5859 0 : return;
5860 :
5861 : /*
5862 : * We simply throw away all the database's table entries by recreating a
5863 : * new hash table for them.
5864 : */
5865 0 : if (dbentry->tables != NULL)
5866 0 : hash_destroy(dbentry->tables);
5867 0 : if (dbentry->functions != NULL)
5868 0 : hash_destroy(dbentry->functions);
5869 :
5870 0 : dbentry->tables = NULL;
5871 0 : dbentry->functions = NULL;
5872 :
5873 : /*
5874 : * Reset database-level stats, too. This creates empty hash tables for
5875 : * tables and functions.
5876 : */
5877 0 : reset_dbentry_counters(dbentry);
5878 : }
5879 :
5880 : /* ----------
5881 : * pgstat_recv_resetshared() -
5882 : *
5883 : * Reset some shared statistics of the cluster.
5884 : * ----------
5885 : */
5886 : static void
5887 0 : pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len)
5888 : {
5889 0 : if (msg->m_resettarget == RESET_BGWRITER)
5890 : {
5891 : /* Reset the global background writer statistics for the cluster. */
5892 0 : memset(&globalStats, 0, sizeof(globalStats));
5893 0 : globalStats.stat_reset_timestamp = GetCurrentTimestamp();
5894 : }
5895 0 : else if (msg->m_resettarget == RESET_ARCHIVER)
5896 : {
5897 : /* Reset the archiver statistics for the cluster. */
5898 0 : memset(&archiverStats, 0, sizeof(archiverStats));
5899 0 : archiverStats.stat_reset_timestamp = GetCurrentTimestamp();
5900 : }
5901 :
5902 : /*
5903 : * Presumably the sender of this message validated the target, don't
5904 : * complain here if it's not valid
5905 : */
5906 0 : }
5907 :
5908 : /* ----------
5909 : * pgstat_recv_resetsinglecounter() -
5910 : *
5911 : * Reset a statistics for a single object
5912 : * ----------
5913 : */
5914 : static void
5915 0 : pgstat_recv_resetsinglecounter(PgStat_MsgResetsinglecounter *msg, int len)
5916 : {
5917 : PgStat_StatDBEntry *dbentry;
5918 :
5919 0 : dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
5920 :
5921 0 : if (!dbentry)
5922 0 : return;
5923 :
5924 : /* Set the reset timestamp for the whole database */
5925 0 : dbentry->stat_reset_timestamp = GetCurrentTimestamp();
5926 :
5927 : /* Remove object if it exists, ignore it if not */
5928 0 : if (msg->m_resettype == RESET_TABLE)
5929 0 : (void) hash_search(dbentry->tables, (void *) &(msg->m_objectid),
5930 : HASH_REMOVE, NULL);
5931 0 : else if (msg->m_resettype == RESET_FUNCTION)
5932 0 : (void) hash_search(dbentry->functions, (void *) &(msg->m_objectid),
5933 : HASH_REMOVE, NULL);
5934 : }
5935 :
5936 : /* ----------
5937 : * pgstat_recv_autovac() -
5938 : *
5939 : * Process an autovacuum signalling message.
5940 : * ----------
5941 : */
5942 : static void
5943 3 : pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len)
5944 : {
5945 : PgStat_StatDBEntry *dbentry;
5946 :
5947 : /*
5948 : * Store the last autovacuum time in the database's hashtable entry.
5949 : */
5950 3 : dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5951 :
5952 3 : dbentry->last_autovac_time = msg->m_start_time;
5953 3 : }
5954 :
5955 : /* ----------
5956 : * pgstat_recv_vacuum() -
5957 : *
5958 : * Process a VACUUM message.
5959 : * ----------
5960 : */
5961 : static void
5962 299 : pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
5963 : {
5964 : PgStat_StatDBEntry *dbentry;
5965 : PgStat_StatTabEntry *tabentry;
5966 :
5967 : /*
5968 : * Store the data in the table's hashtable entry.
5969 : */
5970 299 : dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
5971 :
5972 299 : tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
5973 :
5974 299 : tabentry->n_live_tuples = msg->m_live_tuples;
5975 299 : tabentry->n_dead_tuples = msg->m_dead_tuples;
5976 :
5977 299 : if (msg->m_autovacuum)
5978 : {
5979 13 : tabentry->autovac_vacuum_timestamp = msg->m_vacuumtime;
5980 13 : tabentry->autovac_vacuum_count++;
5981 : }
5982 : else
5983 : {
5984 286 : tabentry->vacuum_timestamp = msg->m_vacuumtime;
5985 286 : tabentry->vacuum_count++;
5986 : }
5987 299 : }
5988 :
5989 : /* ----------
5990 : * pgstat_recv_analyze() -
5991 : *
5992 : * Process an ANALYZE message.
5993 : * ----------
5994 : */
5995 : static void
5996 136 : pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
5997 : {
5998 : PgStat_StatDBEntry *dbentry;
5999 : PgStat_StatTabEntry *tabentry;
6000 :
6001 : /*
6002 : * Store the data in the table's hashtable entry.
6003 : */
6004 136 : dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
6005 :
6006 136 : tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
6007 :
6008 136 : tabentry->n_live_tuples = msg->m_live_tuples;
6009 136 : tabentry->n_dead_tuples = msg->m_dead_tuples;
6010 :
6011 : /*
6012 : * If commanded, reset changes_since_analyze to zero. This forgets any
6013 : * changes that were committed while the ANALYZE was in progress, but we
6014 : * have no good way to estimate how many of those there were.
6015 : */
6016 136 : if (msg->m_resetcounter)
6017 135 : tabentry->changes_since_analyze = 0;
6018 :
6019 136 : if (msg->m_autovacuum)
6020 : {
6021 46 : tabentry->autovac_analyze_timestamp = msg->m_analyzetime;
6022 46 : tabentry->autovac_analyze_count++;
6023 : }
6024 : else
6025 : {
6026 90 : tabentry->analyze_timestamp = msg->m_analyzetime;
6027 90 : tabentry->analyze_count++;
6028 : }
6029 136 : }
6030 :
6031 :
6032 : /* ----------
6033 : * pgstat_recv_archiver() -
6034 : *
6035 : * Process a ARCHIVER message.
6036 : * ----------
6037 : */
6038 : static void
6039 0 : pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len)
6040 : {
6041 0 : if (msg->m_failed)
6042 : {
6043 : /* Failed archival attempt */
6044 0 : ++archiverStats.failed_count;
6045 0 : memcpy(archiverStats.last_failed_wal, msg->m_xlog,
6046 : sizeof(archiverStats.last_failed_wal));
6047 0 : archiverStats.last_failed_timestamp = msg->m_timestamp;
6048 : }
6049 : else
6050 : {
6051 : /* Successful archival operation */
6052 0 : ++archiverStats.archived_count;
6053 0 : memcpy(archiverStats.last_archived_wal, msg->m_xlog,
6054 : sizeof(archiverStats.last_archived_wal));
6055 0 : archiverStats.last_archived_timestamp = msg->m_timestamp;
6056 : }
6057 0 : }
6058 :
6059 : /* ----------
6060 : * pgstat_recv_bgwriter() -
6061 : *
6062 : * Process a BGWRITER message.
6063 : * ----------
6064 : */
6065 : static void
6066 243 : pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
6067 : {
6068 243 : globalStats.timed_checkpoints += msg->m_timed_checkpoints;
6069 243 : globalStats.requested_checkpoints += msg->m_requested_checkpoints;
6070 243 : globalStats.checkpoint_write_time += msg->m_checkpoint_write_time;
6071 243 : globalStats.checkpoint_sync_time += msg->m_checkpoint_sync_time;
6072 243 : globalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints;
6073 243 : globalStats.buf_written_clean += msg->m_buf_written_clean;
6074 243 : globalStats.maxwritten_clean += msg->m_maxwritten_clean;
6075 243 : globalStats.buf_written_backend += msg->m_buf_written_backend;
6076 243 : globalStats.buf_fsync_backend += msg->m_buf_fsync_backend;
6077 243 : globalStats.buf_alloc += msg->m_buf_alloc;
6078 243 : }
6079 :
6080 : /* ----------
6081 : * pgstat_recv_recoveryconflict() -
6082 : *
6083 : * Process a RECOVERYCONFLICT message.
6084 : * ----------
6085 : */
6086 : static void
6087 0 : pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len)
6088 : {
6089 : PgStat_StatDBEntry *dbentry;
6090 :
6091 0 : dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
6092 :
6093 0 : switch (msg->m_reason)
6094 : {
6095 : case PROCSIG_RECOVERY_CONFLICT_DATABASE:
6096 :
6097 : /*
6098 : * Since we drop the information about the database as soon as it
6099 : * replicates, there is no point in counting these conflicts.
6100 : */
6101 0 : break;
6102 : case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
6103 0 : dbentry->n_conflict_tablespace++;
6104 0 : break;
6105 : case PROCSIG_RECOVERY_CONFLICT_LOCK:
6106 0 : dbentry->n_conflict_lock++;
6107 0 : break;
6108 : case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
6109 0 : dbentry->n_conflict_snapshot++;
6110 0 : break;
6111 : case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
6112 0 : dbentry->n_conflict_bufferpin++;
6113 0 : break;
6114 : case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
6115 0 : dbentry->n_conflict_startup_deadlock++;
6116 0 : break;
6117 : }
6118 0 : }
6119 :
6120 : /* ----------
6121 : * pgstat_recv_deadlock() -
6122 : *
6123 : * Process a DEADLOCK message.
6124 : * ----------
6125 : */
6126 : static void
6127 0 : pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len)
6128 : {
6129 : PgStat_StatDBEntry *dbentry;
6130 :
6131 0 : dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
6132 :
6133 0 : dbentry->n_deadlocks++;
6134 0 : }
6135 :
6136 : /* ----------
6137 : * pgstat_recv_tempfile() -
6138 : *
6139 : * Process a TEMPFILE message.
6140 : * ----------
6141 : */
6142 : static void
6143 10 : pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len)
6144 : {
6145 : PgStat_StatDBEntry *dbentry;
6146 :
6147 10 : dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
6148 :
6149 10 : dbentry->n_temp_bytes += msg->m_filesize;
6150 10 : dbentry->n_temp_files += 1;
6151 10 : }
6152 :
6153 : /* ----------
6154 : * pgstat_recv_funcstat() -
6155 : *
6156 : * Count what the backend has done.
6157 : * ----------
6158 : */
6159 : static void
6160 0 : pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len)
6161 : {
6162 0 : PgStat_FunctionEntry *funcmsg = &(msg->m_entry[0]);
6163 : PgStat_StatDBEntry *dbentry;
6164 : PgStat_StatFuncEntry *funcentry;
6165 : int i;
6166 : bool found;
6167 :
6168 0 : dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
6169 :
6170 : /*
6171 : * Process all function entries in the message.
6172 : */
6173 0 : for (i = 0; i < msg->m_nentries; i++, funcmsg++)
6174 : {
6175 0 : funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions,
6176 0 : (void *) &(funcmsg->f_id),
6177 : HASH_ENTER, &found);
6178 :
6179 0 : if (!found)
6180 : {
6181 : /*
6182 : * If it's a new function entry, initialize counters to the values
6183 : * we just got.
6184 : */
6185 0 : funcentry->f_numcalls = funcmsg->f_numcalls;
6186 0 : funcentry->f_total_time = funcmsg->f_total_time;
6187 0 : funcentry->f_self_time = funcmsg->f_self_time;
6188 : }
6189 : else
6190 : {
6191 : /*
6192 : * Otherwise add the values to the existing entry.
6193 : */
6194 0 : funcentry->f_numcalls += funcmsg->f_numcalls;
6195 0 : funcentry->f_total_time += funcmsg->f_total_time;
6196 0 : funcentry->f_self_time += funcmsg->f_self_time;
6197 : }
6198 : }
6199 0 : }
6200 :
6201 : /* ----------
6202 : * pgstat_recv_funcpurge() -
6203 : *
6204 : * Arrange for dead function removal.
6205 : * ----------
6206 : */
6207 : static void
6208 0 : pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len)
6209 : {
6210 : PgStat_StatDBEntry *dbentry;
6211 : int i;
6212 :
6213 0 : dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
6214 :
6215 : /*
6216 : * No need to purge if we don't even know the database.
6217 : */
6218 0 : if (!dbentry || !dbentry->functions)
6219 0 : return;
6220 :
6221 : /*
6222 : * Process all function entries in the message.
6223 : */
6224 0 : for (i = 0; i < msg->m_nentries; i++)
6225 : {
6226 : /* Remove from hashtable if present; we don't care if it's not. */
6227 0 : (void) hash_search(dbentry->functions,
6228 0 : (void *) &(msg->m_functionid[i]),
6229 : HASH_REMOVE, NULL);
6230 : }
6231 : }
6232 :
6233 : /* ----------
6234 : * pgstat_write_statsfile_needed() -
6235 : *
6236 : * Do we need to write out any stats files?
6237 : * ----------
6238 : */
6239 : static bool
6240 6994 : pgstat_write_statsfile_needed(void)
6241 : {
6242 6994 : if (pending_write_requests != NIL)
6243 70 : return true;
6244 :
6245 : /* Everything was written recently */
6246 6924 : return false;
6247 : }
6248 :
6249 : /* ----------
6250 : * pgstat_db_requested() -
6251 : *
6252 : * Checks whether stats for a particular DB need to be written to a file.
6253 : * ----------
6254 : */
6255 : static bool
6256 207 : pgstat_db_requested(Oid databaseid)
6257 : {
6258 : /*
6259 : * If any requests are outstanding at all, we should write the stats for
6260 : * shared catalogs (the "database" with OID 0). This ensures that
6261 : * backends will see up-to-date stats for shared catalogs, even though
6262 : * they send inquiry messages mentioning only their own DB.
6263 : */
6264 207 : if (databaseid == InvalidOid && pending_write_requests != NIL)
6265 69 : return true;
6266 :
6267 : /* Search to see if there's an open request to write this database. */
6268 138 : if (list_member_oid(pending_write_requests, databaseid))
6269 66 : return true;
6270 :
6271 72 : return false;
6272 : }
|