Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dsm.c
4 : * manage dynamic shared memory segments
5 : *
6 : * This file provides a set of services to make programming with dynamic
7 : * shared memory segments more convenient. Unlike the low-level
8 : * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9 : * created using this module will be cleaned up automatically. Mappings
10 : * will be removed when the resource owner under which they were created
11 : * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12 : * have session lifespan. Segments will be removed when there are no
13 : * remaining mappings, or at postmaster shutdown in any case. After a
14 : * hard postmaster crash, remaining segments will be removed, if they
15 : * still exist, at the next postmaster startup.
16 : *
17 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
18 : * Portions Copyright (c) 1994, Regents of the University of California
19 : *
20 : *
21 : * IDENTIFICATION
22 : * src/backend/storage/ipc/dsm.c
23 : *
24 : *-------------------------------------------------------------------------
25 : */
26 :
27 : #include "postgres.h"
28 :
29 : #include <fcntl.h>
30 : #include <unistd.h>
31 : #ifndef WIN32
32 : #include <sys/mman.h>
33 : #endif
34 : #include <sys/stat.h>
35 :
36 : #include "lib/ilist.h"
37 : #include "miscadmin.h"
38 : #include "storage/dsm.h"
39 : #include "storage/ipc.h"
40 : #include "storage/lwlock.h"
41 : #include "storage/pg_shmem.h"
42 : #include "utils/guc.h"
43 : #include "utils/memutils.h"
44 : #include "utils/resowner_private.h"
45 :
46 : #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
47 :
48 : /*
49 : * There's no point in getting too cheap here, because the minimum allocation
50 : * is one OS page, which is probably at least 4KB and could easily be as high
51 : * as 64KB. Each currently sizeof(dsm_control_item), currently 8 bytes.
52 : */
53 : #define PG_DYNSHMEM_FIXED_SLOTS 64
54 : #define PG_DYNSHMEM_SLOTS_PER_BACKEND 2
55 :
56 : #define INVALID_CONTROL_SLOT ((uint32) -1)
57 :
58 : /* Backend-local tracking for on-detach callbacks. */
59 : typedef struct dsm_segment_detach_callback
60 : {
61 : on_dsm_detach_callback function;
62 : Datum arg;
63 : slist_node node;
64 : } dsm_segment_detach_callback;
65 :
66 : /* Backend-local state for a dynamic shared memory segment. */
67 : struct dsm_segment
68 : {
69 : dlist_node node; /* List link in dsm_segment_list. */
70 : ResourceOwner resowner; /* Resource owner. */
71 : dsm_handle handle; /* Segment name. */
72 : uint32 control_slot; /* Slot in control segment. */
73 : void *impl_private; /* Implementation-specific private data. */
74 : void *mapped_address; /* Mapping address, or NULL if unmapped. */
75 : Size mapped_size; /* Size of our mapping. */
76 : slist_head on_detach; /* On-detach callbacks. */
77 : };
78 :
79 : /* Shared-memory state for a dynamic shared memory segment. */
80 : typedef struct dsm_control_item
81 : {
82 : dsm_handle handle;
83 : uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
84 : void *impl_private_pm_handle; /* only needed on Windows */
85 : bool pinned;
86 : } dsm_control_item;
87 :
88 : /* Layout of the dynamic shared memory control segment. */
89 : typedef struct dsm_control_header
90 : {
91 : uint32 magic;
92 : uint32 nitems;
93 : uint32 maxitems;
94 : dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
95 : } dsm_control_header;
96 :
97 : static void dsm_cleanup_for_mmap(void);
98 : static void dsm_postmaster_shutdown(int code, Datum arg);
99 : static dsm_segment *dsm_create_descriptor(void);
100 : static bool dsm_control_segment_sane(dsm_control_header *control,
101 : Size mapped_size);
102 : static uint64 dsm_control_bytes_needed(uint32 nitems);
103 :
104 : /* Has this backend initialized the dynamic shared memory system yet? */
105 : static bool dsm_init_done = false;
106 :
107 : /*
108 : * List of dynamic shared memory segments used by this backend.
109 : *
110 : * At process exit time, we must decrement the reference count of each
111 : * segment we have attached; this list makes it possible to find all such
112 : * segments.
113 : *
114 : * This list should always be empty in the postmaster. We could probably
115 : * allow the postmaster to map dynamic shared memory segments before it
116 : * begins to start child processes, provided that each process adjusted
117 : * the reference counts for those segments in the control segment at
118 : * startup time, but there's no obvious need for such a facility, which
119 : * would also be complex to handle in the EXEC_BACKEND case. Once the
120 : * postmaster has begun spawning children, there's an additional problem:
121 : * each new mapping would require an update to the control segment,
122 : * which requires locking, in which the postmaster must not be involved.
123 : */
124 : static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
125 :
126 : /*
127 : * Control segment information.
128 : *
129 : * Unlike ordinary shared memory segments, the control segment is not
130 : * reference counted; instead, it lasts for the postmaster's entire
131 : * life cycle. For simplicity, it doesn't have a dsm_segment object either.
132 : */
133 : static dsm_handle dsm_control_handle;
134 : static dsm_control_header *dsm_control;
135 : static Size dsm_control_mapped_size = 0;
136 : static void *dsm_control_impl_private = NULL;
137 :
138 : /*
139 : * Start up the dynamic shared memory system.
140 : *
141 : * This is called just once during each cluster lifetime, at postmaster
142 : * startup time.
143 : */
144 : void
145 5 : dsm_postmaster_startup(PGShmemHeader *shim)
146 : {
147 5 : void *dsm_control_address = NULL;
148 : uint32 maxitems;
149 : Size segsize;
150 :
151 5 : Assert(!IsUnderPostmaster);
152 :
153 : /* If dynamic shared memory is disabled, there's nothing to do. */
154 5 : if (dynamic_shared_memory_type == DSM_IMPL_NONE)
155 7 : return;
156 :
157 : /*
158 : * If we're using the mmap implementations, clean up any leftovers.
159 : * Cleanup isn't needed on Windows, and happens earlier in startup for
160 : * POSIX and System V shared memory, via a direct call to
161 : * dsm_cleanup_using_control_segment.
162 : */
163 3 : if (dynamic_shared_memory_type == DSM_IMPL_MMAP)
164 0 : dsm_cleanup_for_mmap();
165 :
166 : /* Determine size for new control segment. */
167 3 : maxitems = PG_DYNSHMEM_FIXED_SLOTS
168 3 : + PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends;
169 3 : elog(DEBUG2, "dynamic shared memory system will support %u segments",
170 : maxitems);
171 3 : segsize = dsm_control_bytes_needed(maxitems);
172 :
173 : /*
174 : * Loop until we find an unused identifier for the new control segment. We
175 : * sometimes use 0 as a sentinel value indicating that no control segment
176 : * is known to exist, so avoid using that value for a real control
177 : * segment.
178 : */
179 : for (;;)
180 : {
181 3 : Assert(dsm_control_address == NULL);
182 3 : Assert(dsm_control_mapped_size == 0);
183 3 : dsm_control_handle = random();
184 3 : if (dsm_control_handle == DSM_HANDLE_INVALID)
185 0 : continue;
186 3 : if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
187 : &dsm_control_impl_private, &dsm_control_address,
188 : &dsm_control_mapped_size, ERROR))
189 3 : break;
190 0 : }
191 3 : dsm_control = dsm_control_address;
192 3 : on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim));
193 3 : elog(DEBUG2,
194 : "created dynamic shared memory control segment %u (%zu bytes)",
195 : dsm_control_handle, segsize);
196 3 : shim->dsm_control = dsm_control_handle;
197 :
198 : /* Initialize control segment. */
199 3 : dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
200 3 : dsm_control->nitems = 0;
201 3 : dsm_control->maxitems = maxitems;
202 : }
203 :
204 : /*
205 : * Determine whether the control segment from the previous postmaster
206 : * invocation still exists. If so, remove the dynamic shared memory
207 : * segments to which it refers, and then the control segment itself.
208 : */
209 : void
210 0 : dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
211 : {
212 0 : void *mapped_address = NULL;
213 0 : void *junk_mapped_address = NULL;
214 0 : void *impl_private = NULL;
215 0 : void *junk_impl_private = NULL;
216 0 : Size mapped_size = 0;
217 0 : Size junk_mapped_size = 0;
218 : uint32 nitems;
219 : uint32 i;
220 : dsm_control_header *old_control;
221 :
222 : /* If dynamic shared memory is disabled, there's nothing to do. */
223 0 : if (dynamic_shared_memory_type == DSM_IMPL_NONE)
224 0 : return;
225 :
226 : /*
227 : * Try to attach the segment. If this fails, it probably just means that
228 : * the operating system has been rebooted and the segment no longer
229 : * exists, or an unrelated process has used the same shm ID. So just fall
230 : * out quietly.
231 : */
232 0 : if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
233 : &mapped_address, &mapped_size, DEBUG1))
234 0 : return;
235 :
236 : /*
237 : * We've managed to reattach it, but the contents might not be sane. If
238 : * they aren't, we disregard the segment after all.
239 : */
240 0 : old_control = (dsm_control_header *) mapped_address;
241 0 : if (!dsm_control_segment_sane(old_control, mapped_size))
242 : {
243 0 : dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
244 : &mapped_address, &mapped_size, LOG);
245 0 : return;
246 : }
247 :
248 : /*
249 : * OK, the control segment looks basically valid, so we can use it to get
250 : * a list of segments that need to be removed.
251 : */
252 0 : nitems = old_control->nitems;
253 0 : for (i = 0; i < nitems; ++i)
254 : {
255 : dsm_handle handle;
256 : uint32 refcnt;
257 :
258 : /* If the reference count is 0, the slot is actually unused. */
259 0 : refcnt = old_control->item[i].refcnt;
260 0 : if (refcnt == 0)
261 0 : continue;
262 :
263 : /* Log debugging information. */
264 0 : handle = old_control->item[i].handle;
265 0 : elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
266 : handle, refcnt);
267 :
268 : /* Destroy the referenced segment. */
269 0 : dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
270 : &junk_mapped_address, &junk_mapped_size, LOG);
271 : }
272 :
273 : /* Destroy the old control segment, too. */
274 0 : elog(DEBUG2,
275 : "cleaning up dynamic shared memory control segment with ID %u",
276 : old_control_handle);
277 0 : dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
278 : &mapped_address, &mapped_size, LOG);
279 : }
280 :
281 : /*
282 : * When we're using the mmap shared memory implementation, "shared memory"
283 : * segments might even manage to survive an operating system reboot.
284 : * But there's no guarantee as to exactly what will survive: some segments
285 : * may survive, and others may not, and the contents of some may be out
286 : * of date. In particular, the control segment may be out of date, so we
287 : * can't rely on it to figure out what to remove. However, since we know
288 : * what directory contains the files we used as shared memory, we can simply
289 : * scan the directory and blow everything away that shouldn't be there.
290 : */
291 : static void
292 0 : dsm_cleanup_for_mmap(void)
293 : {
294 : DIR *dir;
295 : struct dirent *dent;
296 :
297 : /* Open the directory; can't use AllocateDir in postmaster. */
298 0 : if ((dir = AllocateDir(PG_DYNSHMEM_DIR)) == NULL)
299 0 : ereport(ERROR,
300 : (errcode_for_file_access(),
301 : errmsg("could not open directory \"%s\": %m",
302 : PG_DYNSHMEM_DIR)));
303 :
304 : /* Scan for something with a name of the correct format. */
305 0 : while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
306 : {
307 0 : if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
308 : strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
309 : {
310 : char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
311 :
312 0 : snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
313 :
314 0 : elog(DEBUG2, "removing file \"%s\"", buf);
315 :
316 : /* We found a matching file; so remove it. */
317 0 : if (unlink(buf) != 0)
318 : {
319 : int save_errno;
320 :
321 0 : save_errno = errno;
322 0 : closedir(dir);
323 0 : errno = save_errno;
324 :
325 0 : ereport(ERROR,
326 : (errcode_for_file_access(),
327 : errmsg("could not remove file \"%s\": %m", buf)));
328 : }
329 : }
330 : }
331 :
332 : /* Cleanup complete. */
333 0 : FreeDir(dir);
334 0 : }
335 :
336 : /*
337 : * At shutdown time, we iterate over the control segment and remove all
338 : * remaining dynamic shared memory segments. We avoid throwing errors here;
339 : * the postmaster is shutting down either way, and this is just non-critical
340 : * resource cleanup.
341 : */
342 : static void
343 3 : dsm_postmaster_shutdown(int code, Datum arg)
344 : {
345 : uint32 nitems;
346 : uint32 i;
347 : void *dsm_control_address;
348 3 : void *junk_mapped_address = NULL;
349 3 : void *junk_impl_private = NULL;
350 3 : Size junk_mapped_size = 0;
351 3 : PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg);
352 :
353 : /*
354 : * If some other backend exited uncleanly, it might have corrupted the
355 : * control segment while it was dying. In that case, we warn and ignore
356 : * the contents of the control segment. This may end up leaving behind
357 : * stray shared memory segments, but there's not much we can do about that
358 : * if the metadata is gone.
359 : */
360 3 : nitems = dsm_control->nitems;
361 3 : if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
362 : {
363 0 : ereport(LOG,
364 : (errmsg("dynamic shared memory control segment is corrupt")));
365 3 : return;
366 : }
367 :
368 : /* Remove any remaining segments. */
369 5 : for (i = 0; i < nitems; ++i)
370 : {
371 : dsm_handle handle;
372 :
373 : /* If the reference count is 0, the slot is actually unused. */
374 2 : if (dsm_control->item[i].refcnt == 0)
375 2 : continue;
376 :
377 : /* Log debugging information. */
378 0 : handle = dsm_control->item[i].handle;
379 0 : elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
380 : handle);
381 :
382 : /* Destroy the segment. */
383 0 : dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
384 : &junk_mapped_address, &junk_mapped_size, LOG);
385 : }
386 :
387 : /* Remove the control segment itself. */
388 3 : elog(DEBUG2,
389 : "cleaning up dynamic shared memory control segment with ID %u",
390 : dsm_control_handle);
391 3 : dsm_control_address = dsm_control;
392 3 : dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0,
393 : &dsm_control_impl_private, &dsm_control_address,
394 : &dsm_control_mapped_size, LOG);
395 3 : dsm_control = dsm_control_address;
396 3 : shim->dsm_control = 0;
397 : }
398 :
399 : /*
400 : * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
401 : * we must reread the state file and map the control segment; in other cases,
402 : * we'll have inherited the postmaster's mapping and global variables.
403 : */
404 : static void
405 116 : dsm_backend_startup(void)
406 : {
407 : /* If dynamic shared memory is disabled, reject this. */
408 116 : if (dynamic_shared_memory_type == DSM_IMPL_NONE)
409 0 : ereport(ERROR,
410 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
411 : errmsg("dynamic shared memory is disabled"),
412 : errhint("Set dynamic_shared_memory_type to a value other than \"none\".")));
413 :
414 : #ifdef EXEC_BACKEND
415 : {
416 : void *control_address = NULL;
417 :
418 : /* Attach control segment. */
419 : Assert(dsm_control_handle != 0);
420 : dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0,
421 : &dsm_control_impl_private, &control_address,
422 : &dsm_control_mapped_size, ERROR);
423 : dsm_control = control_address;
424 : /* If control segment doesn't look sane, something is badly wrong. */
425 : if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
426 : {
427 : dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
428 : &dsm_control_impl_private, &control_address,
429 : &dsm_control_mapped_size, WARNING);
430 : ereport(FATAL,
431 : (errcode(ERRCODE_INTERNAL_ERROR),
432 : errmsg("dynamic shared memory control segment is not valid")));
433 : }
434 : }
435 : #endif
436 :
437 116 : dsm_init_done = true;
438 116 : }
439 :
440 : #ifdef EXEC_BACKEND
441 : /*
442 : * When running under EXEC_BACKEND, we get a callback here when the main
443 : * shared memory segment is re-attached, so that we can record the control
444 : * handle retrieved from it.
445 : */
446 : void
447 : dsm_set_control_handle(dsm_handle h)
448 : {
449 : Assert(dsm_control_handle == 0 && h != 0);
450 : dsm_control_handle = h;
451 : }
452 : #endif
453 :
454 : /*
455 : * Create a new dynamic shared memory segment.
456 : *
457 : * If there is a non-NULL CurrentResourceOwner, the new segment is associated
458 : * with it and must be detached before the resource owner releases, or a
459 : * warning will be logged. If CurrentResourceOwner is NULL, the segment
460 : * remains attached until explicitely detached or the session ends.
461 : * Creating with a NULL CurrentResourceOwner is equivalent to creating
462 : * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
463 : */
464 : dsm_segment *
465 19 : dsm_create(Size size, int flags)
466 : {
467 : dsm_segment *seg;
468 : uint32 i;
469 : uint32 nitems;
470 :
471 : /* Unsafe in postmaster (and pointless in a stand-alone backend). */
472 19 : Assert(IsUnderPostmaster);
473 :
474 19 : if (!dsm_init_done)
475 1 : dsm_backend_startup();
476 :
477 : /* Create a new segment descriptor. */
478 19 : seg = dsm_create_descriptor();
479 :
480 : /* Loop until we find an unused segment identifier. */
481 : for (;;)
482 : {
483 19 : Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
484 19 : seg->handle = random();
485 19 : if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
486 0 : continue;
487 19 : if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
488 : &seg->mapped_address, &seg->mapped_size, ERROR))
489 19 : break;
490 0 : }
491 :
492 : /* Lock the control segment so we can register the new segment. */
493 19 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
494 :
495 : /* Search the control segment for an unused slot. */
496 19 : nitems = dsm_control->nitems;
497 21 : for (i = 0; i < nitems; ++i)
498 : {
499 19 : if (dsm_control->item[i].refcnt == 0)
500 : {
501 17 : dsm_control->item[i].handle = seg->handle;
502 : /* refcnt of 1 triggers destruction, so start at 2 */
503 17 : dsm_control->item[i].refcnt = 2;
504 17 : dsm_control->item[i].impl_private_pm_handle = NULL;
505 17 : dsm_control->item[i].pinned = false;
506 17 : seg->control_slot = i;
507 17 : LWLockRelease(DynamicSharedMemoryControlLock);
508 17 : return seg;
509 : }
510 : }
511 :
512 : /* Verify that we can support an additional mapping. */
513 2 : if (nitems >= dsm_control->maxitems)
514 : {
515 0 : if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
516 : {
517 0 : LWLockRelease(DynamicSharedMemoryControlLock);
518 0 : dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
519 : &seg->mapped_address, &seg->mapped_size, WARNING);
520 0 : if (seg->resowner != NULL)
521 0 : ResourceOwnerForgetDSM(seg->resowner, seg);
522 0 : dlist_delete(&seg->node);
523 0 : pfree(seg);
524 0 : return NULL;
525 : }
526 0 : ereport(ERROR,
527 : (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
528 : errmsg("too many dynamic shared memory segments")));
529 : }
530 :
531 : /* Enter the handle into a new array slot. */
532 2 : dsm_control->item[nitems].handle = seg->handle;
533 : /* refcnt of 1 triggers destruction, so start at 2 */
534 2 : dsm_control->item[nitems].refcnt = 2;
535 2 : dsm_control->item[nitems].impl_private_pm_handle = NULL;
536 2 : dsm_control->item[nitems].pinned = false;
537 2 : seg->control_slot = nitems;
538 2 : dsm_control->nitems++;
539 2 : LWLockRelease(DynamicSharedMemoryControlLock);
540 :
541 2 : return seg;
542 : }
543 :
544 : /*
545 : * Attach a dynamic shared memory segment.
546 : *
547 : * See comments for dsm_segment_handle() for an explanation of how this
548 : * is intended to be used.
549 : *
550 : * This function will return NULL if the segment isn't known to the system.
551 : * This can happen if we're asked to attach the segment, but then everyone
552 : * else detaches it (causing it to be destroyed) before we get around to
553 : * attaching it.
554 : *
555 : * If there is a non-NULL CurrentResourceOwner, the attached segment is
556 : * associated with it and must be detached before the resource owner releases,
557 : * or a warning will be logged. Otherwise the segment remains attached until
558 : * explicitely detached or the session ends. See the note atop dsm_create().
559 : */
560 : dsm_segment *
561 159 : dsm_attach(dsm_handle h)
562 : {
563 : dsm_segment *seg;
564 : dlist_iter iter;
565 : uint32 i;
566 : uint32 nitems;
567 :
568 : /* Unsafe in postmaster (and pointless in a stand-alone backend). */
569 159 : Assert(IsUnderPostmaster);
570 :
571 159 : if (!dsm_init_done)
572 115 : dsm_backend_startup();
573 :
574 : /*
575 : * Since this is just a debugging cross-check, we could leave it out
576 : * altogether, or include it only in assert-enabled builds. But since the
577 : * list of attached segments should normally be very short, let's include
578 : * it always for right now.
579 : *
580 : * If you're hitting this error, you probably want to attempt to find an
581 : * existing mapping via dsm_find_mapping() before calling dsm_attach() to
582 : * create a new one.
583 : */
584 203 : dlist_foreach(iter, &dsm_segment_list)
585 : {
586 44 : seg = dlist_container(dsm_segment, node, iter.cur);
587 44 : if (seg->handle == h)
588 0 : elog(ERROR, "can't attach the same segment more than once");
589 : }
590 :
591 : /* Create a new segment descriptor. */
592 159 : seg = dsm_create_descriptor();
593 159 : seg->handle = h;
594 :
595 : /* Bump reference count for this segment in shared memory. */
596 159 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
597 159 : nitems = dsm_control->nitems;
598 203 : for (i = 0; i < nitems; ++i)
599 : {
600 : /* If the reference count is 0, the slot is actually unused. */
601 203 : if (dsm_control->item[i].refcnt == 0)
602 0 : continue;
603 :
604 : /* If the handle doesn't match, it's not the slot we want. */
605 203 : if (dsm_control->item[i].handle != seg->handle)
606 44 : continue;
607 :
608 : /*
609 : * If the reference count is 1, the slot is still in use, but the
610 : * segment is in the process of going away. Treat that as if we
611 : * didn't find a match.
612 : */
613 159 : if (dsm_control->item[i].refcnt == 1)
614 0 : break;
615 :
616 : /* Otherwise we've found a match. */
617 159 : dsm_control->item[i].refcnt++;
618 159 : seg->control_slot = i;
619 159 : break;
620 : }
621 159 : LWLockRelease(DynamicSharedMemoryControlLock);
622 :
623 : /*
624 : * If we didn't find the handle we're looking for in the control segment,
625 : * it probably means that everyone else who had it mapped, including the
626 : * original creator, died before we got to this point. It's up to the
627 : * caller to decide what to do about that.
628 : */
629 159 : if (seg->control_slot == INVALID_CONTROL_SLOT)
630 : {
631 0 : dsm_detach(seg);
632 0 : return NULL;
633 : }
634 :
635 : /* Here's where we actually try to map the segment. */
636 159 : dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
637 : &seg->mapped_address, &seg->mapped_size, ERROR);
638 :
639 159 : return seg;
640 : }
641 :
642 : /*
643 : * At backend shutdown time, detach any segments that are still attached.
644 : * (This is similar to dsm_detach_all, except that there's no reason to
645 : * unmap the control segment before exiting, so we don't bother.)
646 : */
647 : void
648 691 : dsm_backend_shutdown(void)
649 : {
650 1497 : while (!dlist_is_empty(&dsm_segment_list))
651 : {
652 : dsm_segment *seg;
653 :
654 115 : seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
655 115 : dsm_detach(seg);
656 : }
657 691 : }
658 :
659 : /*
660 : * Detach all shared memory segments, including the control segments. This
661 : * should be called, along with PGSharedMemoryDetach, in processes that
662 : * might inherit mappings but are not intended to be connected to dynamic
663 : * shared memory.
664 : */
665 : void
666 1 : dsm_detach_all(void)
667 : {
668 1 : void *control_address = dsm_control;
669 :
670 2 : while (!dlist_is_empty(&dsm_segment_list))
671 : {
672 : dsm_segment *seg;
673 :
674 0 : seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
675 0 : dsm_detach(seg);
676 : }
677 :
678 1 : if (control_address != NULL)
679 1 : dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
680 : &dsm_control_impl_private, &control_address,
681 : &dsm_control_mapped_size, ERROR);
682 1 : }
683 :
684 : /*
685 : * Resize an existing shared memory segment.
686 : *
687 : * This may cause the shared memory segment to be remapped at a different
688 : * address. For the caller's convenience, we return the mapped address.
689 : */
690 : void *
691 0 : dsm_resize(dsm_segment *seg, Size size)
692 : {
693 0 : Assert(seg->control_slot != INVALID_CONTROL_SLOT);
694 0 : dsm_impl_op(DSM_OP_RESIZE, seg->handle, size, &seg->impl_private,
695 : &seg->mapped_address, &seg->mapped_size, ERROR);
696 0 : return seg->mapped_address;
697 : }
698 :
699 : /*
700 : * Remap an existing shared memory segment.
701 : *
702 : * This is intended to be used when some other process has extended the
703 : * mapping using dsm_resize(), but we've still only got the initial
704 : * portion mapped. Since this might change the address at which the
705 : * segment is mapped, we return the new mapped address.
706 : */
707 : void *
708 0 : dsm_remap(dsm_segment *seg)
709 : {
710 0 : dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
711 : &seg->mapped_address, &seg->mapped_size, ERROR);
712 :
713 0 : return seg->mapped_address;
714 : }
715 :
716 : /*
717 : * Detach from a shared memory segment, destroying the segment if we
718 : * remove the last reference.
719 : *
720 : * This function should never fail. It will often be invoked when aborting
721 : * a transaction, and a further error won't serve any purpose. It's not a
722 : * complete disaster if we fail to unmap or destroy the segment; it means a
723 : * resource leak, but that doesn't necessarily preclude further operations.
724 : */
725 : void
726 178 : dsm_detach(dsm_segment *seg)
727 : {
728 : /*
729 : * Invoke registered callbacks. Just in case one of those callbacks
730 : * throws a further error that brings us back here, pop the callback
731 : * before invoking it, to avoid infinite error recursion.
732 : */
733 724 : while (!slist_is_empty(&seg->on_detach))
734 : {
735 : slist_node *node;
736 : dsm_segment_detach_callback *cb;
737 : on_dsm_detach_callback function;
738 : Datum arg;
739 :
740 368 : node = slist_pop_head_node(&seg->on_detach);
741 368 : cb = slist_container(dsm_segment_detach_callback, node, node);
742 368 : function = cb->function;
743 368 : arg = cb->arg;
744 368 : pfree(cb);
745 :
746 368 : function(seg, arg);
747 : }
748 :
749 : /*
750 : * Try to remove the mapping, if one exists. Normally, there will be, but
751 : * maybe not, if we failed partway through a create or attach operation.
752 : * We remove the mapping before decrementing the reference count so that
753 : * the process that sees a zero reference count can be certain that no
754 : * remaining mappings exist. Even if this fails, we pretend that it
755 : * works, because retrying is likely to fail in the same way.
756 : */
757 178 : if (seg->mapped_address != NULL)
758 : {
759 178 : dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private,
760 : &seg->mapped_address, &seg->mapped_size, WARNING);
761 178 : seg->impl_private = NULL;
762 178 : seg->mapped_address = NULL;
763 178 : seg->mapped_size = 0;
764 : }
765 :
766 : /* Reduce reference count, if we previously increased it. */
767 178 : if (seg->control_slot != INVALID_CONTROL_SLOT)
768 : {
769 : uint32 refcnt;
770 178 : uint32 control_slot = seg->control_slot;
771 :
772 178 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
773 178 : Assert(dsm_control->item[control_slot].handle == seg->handle);
774 178 : Assert(dsm_control->item[control_slot].refcnt > 1);
775 178 : refcnt = --dsm_control->item[control_slot].refcnt;
776 178 : seg->control_slot = INVALID_CONTROL_SLOT;
777 178 : LWLockRelease(DynamicSharedMemoryControlLock);
778 :
779 : /* If new reference count is 1, try to destroy the segment. */
780 178 : if (refcnt == 1)
781 : {
782 : /* A pinned segment should never reach 1. */
783 17 : Assert(!dsm_control->item[control_slot].pinned);
784 :
785 : /*
786 : * If we fail to destroy the segment here, or are killed before we
787 : * finish doing so, the reference count will remain at 1, which
788 : * will mean that nobody else can attach to the segment. At
789 : * postmaster shutdown time, or when a new postmaster is started
790 : * after a hard kill, another attempt will be made to remove the
791 : * segment.
792 : *
793 : * The main case we're worried about here is being killed by a
794 : * signal before we can finish removing the segment. In that
795 : * case, it's important to be sure that the segment still gets
796 : * removed. If we actually fail to remove the segment for some
797 : * other reason, the postmaster may not have any better luck than
798 : * we did. There's not much we can do about that, though.
799 : */
800 17 : if (dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
801 : &seg->mapped_address, &seg->mapped_size, WARNING))
802 : {
803 17 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
804 17 : Assert(dsm_control->item[control_slot].handle == seg->handle);
805 17 : Assert(dsm_control->item[control_slot].refcnt == 1);
806 17 : dsm_control->item[control_slot].refcnt = 0;
807 17 : LWLockRelease(DynamicSharedMemoryControlLock);
808 : }
809 : }
810 : }
811 :
812 : /* Clean up our remaining backend-private data structures. */
813 178 : if (seg->resowner != NULL)
814 178 : ResourceOwnerForgetDSM(seg->resowner, seg);
815 178 : dlist_delete(&seg->node);
816 178 : pfree(seg);
817 178 : }
818 :
819 : /*
820 : * Keep a dynamic shared memory mapping until end of session.
821 : *
822 : * By default, mappings are owned by the current resource owner, which
823 : * typically means they stick around for the duration of the current query
824 : * only.
825 : */
826 : void
827 0 : dsm_pin_mapping(dsm_segment *seg)
828 : {
829 0 : if (seg->resowner != NULL)
830 : {
831 0 : ResourceOwnerForgetDSM(seg->resowner, seg);
832 0 : seg->resowner = NULL;
833 : }
834 0 : }
835 :
836 : /*
837 : * Arrange to remove a dynamic shared memory mapping at cleanup time.
838 : *
839 : * dsm_pin_mapping() can be used to preserve a mapping for the entire
840 : * lifetime of a process; this function reverses that decision, making
841 : * the segment owned by the current resource owner. This may be useful
842 : * just before performing some operation that will invalidate the segment
843 : * for future use by this backend.
844 : */
845 : void
846 0 : dsm_unpin_mapping(dsm_segment *seg)
847 : {
848 0 : Assert(seg->resowner == NULL);
849 0 : ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
850 0 : seg->resowner = CurrentResourceOwner;
851 0 : ResourceOwnerRememberDSM(seg->resowner, seg);
852 0 : }
853 :
854 : /*
855 : * Keep a dynamic shared memory segment until postmaster shutdown, or until
856 : * dsm_unpin_segment is called.
857 : *
858 : * This function should not be called more than once per segment, unless the
859 : * segment is explicitly unpinned with dsm_unpin_segment in between calls.
860 : *
861 : * Note that this function does not arrange for the current process to
862 : * keep the segment mapped indefinitely; if that behavior is desired,
863 : * dsm_pin_mapping() should be used from each process that needs to
864 : * retain the mapping.
865 : */
866 : void
867 2 : dsm_pin_segment(dsm_segment *seg)
868 : {
869 : void *handle;
870 :
871 : /*
872 : * Bump reference count for this segment in shared memory. This will
873 : * ensure that even if there is no session which is attached to this
874 : * segment, it will remain until postmaster shutdown or an explicit call
875 : * to unpin.
876 : */
877 2 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
878 2 : if (dsm_control->item[seg->control_slot].pinned)
879 0 : elog(ERROR, "cannot pin a segment that is already pinned");
880 2 : dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
881 2 : dsm_control->item[seg->control_slot].pinned = true;
882 2 : dsm_control->item[seg->control_slot].refcnt++;
883 2 : dsm_control->item[seg->control_slot].impl_private_pm_handle = handle;
884 2 : LWLockRelease(DynamicSharedMemoryControlLock);
885 2 : }
886 :
887 : /*
888 : * Unpin a dynamic shared memory segment that was previously pinned with
889 : * dsm_pin_segment. This function should not be called unless dsm_pin_segment
890 : * was previously called for this segment.
891 : *
892 : * The argument is a dsm_handle rather than a dsm_segment in case you want
893 : * to unpin a segment to which you haven't attached. This turns out to be
894 : * useful if, for example, a reference to one shared memory segment is stored
895 : * within another shared memory segment. You might want to unpin the
896 : * referenced segment before destroying the referencing segment.
897 : */
898 : void
899 2 : dsm_unpin_segment(dsm_handle handle)
900 : {
901 2 : uint32 control_slot = INVALID_CONTROL_SLOT;
902 2 : bool destroy = false;
903 : uint32 i;
904 :
905 : /* Find the control slot for the given handle. */
906 2 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
907 4 : for (i = 0; i < dsm_control->nitems; ++i)
908 : {
909 : /* Skip unused slots. */
910 4 : if (dsm_control->item[i].refcnt == 0)
911 0 : continue;
912 :
913 : /* If we've found our handle, we can stop searching. */
914 4 : if (dsm_control->item[i].handle == handle)
915 : {
916 2 : control_slot = i;
917 2 : break;
918 : }
919 : }
920 :
921 : /*
922 : * We should definitely have found the slot, and it should not already be
923 : * in the process of going away, because this function should only be
924 : * called on a segment which is pinned.
925 : */
926 2 : if (control_slot == INVALID_CONTROL_SLOT)
927 0 : elog(ERROR, "cannot unpin unknown segment handle");
928 2 : if (!dsm_control->item[control_slot].pinned)
929 0 : elog(ERROR, "cannot unpin a segment that is not pinned");
930 2 : Assert(dsm_control->item[control_slot].refcnt > 1);
931 :
932 : /*
933 : * Allow implementation-specific code to run. We have to do this before
934 : * releasing the lock, because impl_private_pm_handle may get modified by
935 : * dsm_impl_unpin_segment.
936 : */
937 2 : dsm_impl_unpin_segment(handle,
938 2 : &dsm_control->item[control_slot].impl_private_pm_handle);
939 :
940 : /* Note that 1 means no references (0 means unused slot). */
941 2 : if (--dsm_control->item[control_slot].refcnt == 1)
942 2 : destroy = true;
943 2 : dsm_control->item[control_slot].pinned = false;
944 :
945 : /* Now we can release the lock. */
946 2 : LWLockRelease(DynamicSharedMemoryControlLock);
947 :
948 : /* Clean up resources if that was the last reference. */
949 2 : if (destroy)
950 : {
951 2 : void *junk_impl_private = NULL;
952 2 : void *junk_mapped_address = NULL;
953 2 : Size junk_mapped_size = 0;
954 :
955 : /*
956 : * For an explanation of how error handling works in this case, see
957 : * comments in dsm_detach. Note that if we reach this point, the
958 : * current process certainly does not have the segment mapped, because
959 : * if it did, the reference count would have still been greater than 1
960 : * even after releasing the reference count held by the pin. The fact
961 : * that there can't be a dsm_segment for this handle makes it OK to
962 : * pass the mapped size, mapped address, and private data as NULL
963 : * here.
964 : */
965 2 : if (dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
966 : &junk_mapped_address, &junk_mapped_size, WARNING))
967 : {
968 2 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
969 2 : Assert(dsm_control->item[control_slot].handle == handle);
970 2 : Assert(dsm_control->item[control_slot].refcnt == 1);
971 2 : dsm_control->item[control_slot].refcnt = 0;
972 2 : LWLockRelease(DynamicSharedMemoryControlLock);
973 : }
974 : }
975 2 : }
976 :
977 : /*
978 : * Find an existing mapping for a shared memory segment, if there is one.
979 : */
980 : dsm_segment *
981 0 : dsm_find_mapping(dsm_handle h)
982 : {
983 : dlist_iter iter;
984 : dsm_segment *seg;
985 :
986 0 : dlist_foreach(iter, &dsm_segment_list)
987 : {
988 0 : seg = dlist_container(dsm_segment, node, iter.cur);
989 0 : if (seg->handle == h)
990 0 : return seg;
991 : }
992 :
993 0 : return NULL;
994 : }
995 :
996 : /*
997 : * Get the address at which a dynamic shared memory segment is mapped.
998 : */
999 : void *
1000 178 : dsm_segment_address(dsm_segment *seg)
1001 : {
1002 178 : Assert(seg->mapped_address != NULL);
1003 178 : return seg->mapped_address;
1004 : }
1005 :
1006 : /*
1007 : * Get the size of a mapping.
1008 : */
1009 : Size
1010 0 : dsm_segment_map_length(dsm_segment *seg)
1011 : {
1012 0 : Assert(seg->mapped_address != NULL);
1013 0 : return seg->mapped_size;
1014 : }
1015 :
1016 : /*
1017 : * Get a handle for a mapping.
1018 : *
1019 : * To establish communication via dynamic shared memory between two backends,
1020 : * one of them should first call dsm_create() to establish a new shared
1021 : * memory mapping. That process should then call dsm_segment_handle() to
1022 : * obtain a handle for the mapping, and pass that handle to the
1023 : * coordinating backend via some means (e.g. bgw_main_arg, or via the
1024 : * main shared memory segment). The recipient, once in possession of the
1025 : * handle, should call dsm_attach().
1026 : */
1027 : dsm_handle
1028 34 : dsm_segment_handle(dsm_segment *seg)
1029 : {
1030 34 : return seg->handle;
1031 : }
1032 :
1033 : /*
1034 : * Register an on-detach callback for a dynamic shared memory segment.
1035 : */
1036 : void
1037 715 : on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
1038 : {
1039 : dsm_segment_detach_callback *cb;
1040 :
1041 715 : cb = MemoryContextAlloc(TopMemoryContext,
1042 : sizeof(dsm_segment_detach_callback));
1043 715 : cb->function = function;
1044 715 : cb->arg = arg;
1045 715 : slist_push_head(&seg->on_detach, &cb->node);
1046 715 : }
1047 :
1048 : /*
1049 : * Unregister an on-detach callback for a dynamic shared memory segment.
1050 : */
1051 : void
1052 347 : cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function,
1053 : Datum arg)
1054 : {
1055 : slist_mutable_iter iter;
1056 :
1057 1145 : slist_foreach_modify(iter, &seg->on_detach)
1058 : {
1059 : dsm_segment_detach_callback *cb;
1060 :
1061 1145 : cb = slist_container(dsm_segment_detach_callback, node, iter.cur);
1062 1145 : if (cb->function == function && cb->arg == arg)
1063 : {
1064 347 : slist_delete_current(&iter);
1065 347 : pfree(cb);
1066 347 : break;
1067 : }
1068 : }
1069 347 : }
1070 :
1071 : /*
1072 : * Discard all registered on-detach callbacks without executing them.
1073 : */
1074 : void
1075 341 : reset_on_dsm_detach(void)
1076 : {
1077 : dlist_iter iter;
1078 :
1079 341 : dlist_foreach(iter, &dsm_segment_list)
1080 : {
1081 0 : dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur);
1082 :
1083 : /* Throw away explicit on-detach actions one by one. */
1084 0 : while (!slist_is_empty(&seg->on_detach))
1085 : {
1086 : slist_node *node;
1087 : dsm_segment_detach_callback *cb;
1088 :
1089 0 : node = slist_pop_head_node(&seg->on_detach);
1090 0 : cb = slist_container(dsm_segment_detach_callback, node, node);
1091 0 : pfree(cb);
1092 : }
1093 :
1094 : /*
1095 : * Decrementing the reference count is a sort of implicit on-detach
1096 : * action; make sure we don't do that, either.
1097 : */
1098 0 : seg->control_slot = INVALID_CONTROL_SLOT;
1099 : }
1100 341 : }
1101 :
1102 : /*
1103 : * Create a segment descriptor.
1104 : */
1105 : static dsm_segment *
1106 178 : dsm_create_descriptor(void)
1107 : {
1108 : dsm_segment *seg;
1109 :
1110 178 : if (CurrentResourceOwner)
1111 178 : ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
1112 :
1113 178 : seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment));
1114 178 : dlist_push_head(&dsm_segment_list, &seg->node);
1115 :
1116 : /* seg->handle must be initialized by the caller */
1117 178 : seg->control_slot = INVALID_CONTROL_SLOT;
1118 178 : seg->impl_private = NULL;
1119 178 : seg->mapped_address = NULL;
1120 178 : seg->mapped_size = 0;
1121 :
1122 178 : seg->resowner = CurrentResourceOwner;
1123 178 : if (CurrentResourceOwner)
1124 178 : ResourceOwnerRememberDSM(CurrentResourceOwner, seg);
1125 :
1126 178 : slist_init(&seg->on_detach);
1127 :
1128 178 : return seg;
1129 : }
1130 :
1131 : /*
1132 : * Sanity check a control segment.
1133 : *
1134 : * The goal here isn't to detect everything that could possibly be wrong with
1135 : * the control segment; there's not enough information for that. Rather, the
1136 : * goal is to make sure that someone can iterate over the items in the segment
1137 : * without overrunning the end of the mapping and crashing. We also check
1138 : * the magic number since, if that's messed up, this may not even be one of
1139 : * our segments at all.
1140 : */
1141 : static bool
1142 3 : dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
1143 : {
1144 3 : if (mapped_size < offsetof(dsm_control_header, item))
1145 0 : return false; /* Mapped size too short to read header. */
1146 3 : if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
1147 0 : return false; /* Magic number doesn't match. */
1148 3 : if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
1149 0 : return false; /* Max item count won't fit in map. */
1150 3 : if (control->nitems > control->maxitems)
1151 0 : return false; /* Overfull. */
1152 3 : return true;
1153 : }
1154 :
1155 : /*
1156 : * Compute the number of control-segment bytes needed to store a given
1157 : * number of items.
1158 : */
1159 : static uint64
1160 6 : dsm_control_bytes_needed(uint32 nitems)
1161 : {
1162 6 : return offsetof(dsm_control_header, item)
1163 6 : + sizeof(dsm_control_item) * (uint64) nitems;
1164 : }
|