Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dsm_impl.c
4 : * manage dynamic shared memory segments
5 : *
6 : * This file provides low-level APIs for creating and destroying shared
7 : * memory segments using several different possible techniques. We refer
8 : * to these segments as dynamic because they can be created, altered, and
9 : * destroyed at any point during the server life cycle. This is unlike
10 : * the main shared memory segment, of which there is always exactly one
11 : * and which is always mapped at a fixed address in every PostgreSQL
12 : * background process.
13 : *
14 : * Because not all systems provide the same primitives in this area, nor
15 : * do all primitives behave the same way on all systems, we provide
16 : * several implementations of this facility. Many systems implement
17 : * POSIX shared memory (shm_open etc.), which is well-suited to our needs
18 : * in this area, with the exception that shared memory identifiers live
19 : * in a flat system-wide namespace, raising the uncomfortable prospect of
20 : * name collisions with other processes (including other copies of
21 : * PostgreSQL) running on the same system. Some systems only support
22 : * the older System V shared memory interface (shmget etc.) which is
23 : * also usable; however, the default allocation limits are often quite
24 : * small, and the namespace is even more restricted.
25 : *
26 : * We also provide an mmap-based shared memory implementation. This may
27 : * be useful on systems that provide shared memory via a special-purpose
28 : * filesystem; by opting for this implementation, the user can even
29 : * control precisely where their shared memory segments are placed. It
30 : * can also be used as a fallback for systems where shm_open and shmget
31 : * are not available or can't be used for some reason. Of course,
32 : * mapping a file residing on an actual spinning disk is a fairly poor
33 : * approximation for shared memory because writeback may hurt performance
34 : * substantially, but there should be few systems where we must make do
35 : * with such poor tools.
36 : *
37 : * As ever, Windows requires its own implementation.
38 : *
39 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
40 : * Portions Copyright (c) 1994, Regents of the University of California
41 : *
42 : *
43 : * IDENTIFICATION
44 : * src/backend/storage/ipc/dsm_impl.c
45 : *
46 : *-------------------------------------------------------------------------
47 : */
48 :
49 : #include "postgres.h"
50 :
51 : #include <fcntl.h>
52 : #include <unistd.h>
53 : #ifndef WIN32
54 : #include <sys/mman.h>
55 : #endif
56 : #include <sys/stat.h>
57 : #ifdef HAVE_SYS_IPC_H
58 : #include <sys/ipc.h>
59 : #endif
60 : #ifdef HAVE_SYS_SHM_H
61 : #include <sys/shm.h>
62 : #endif
63 : #include "pgstat.h"
64 :
65 : #include "portability/mem.h"
66 : #include "storage/dsm_impl.h"
67 : #include "storage/fd.h"
68 : #include "utils/guc.h"
69 : #include "utils/memutils.h"
70 : #include "postmaster/postmaster.h"
71 :
72 : #ifdef USE_DSM_POSIX
73 : static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
74 : void **impl_private, void **mapped_address,
75 : Size *mapped_size, int elevel);
76 : #endif
77 : #ifdef USE_DSM_SYSV
78 : static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
79 : void **impl_private, void **mapped_address,
80 : Size *mapped_size, int elevel);
81 : #endif
82 : #ifdef USE_DSM_WINDOWS
83 : static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
84 : void **impl_private, void **mapped_address,
85 : Size *mapped_size, int elevel);
86 : #endif
87 : #ifdef USE_DSM_MMAP
88 : static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
89 : void **impl_private, void **mapped_address,
90 : Size *mapped_size, int elevel);
91 : #endif
92 : static int errcode_for_dynamic_shared_memory(void);
93 :
94 : const struct config_enum_entry dynamic_shared_memory_options[] = {
95 : #ifdef USE_DSM_POSIX
96 : {"posix", DSM_IMPL_POSIX, false},
97 : #endif
98 : #ifdef USE_DSM_SYSV
99 : {"sysv", DSM_IMPL_SYSV, false},
100 : #endif
101 : #ifdef USE_DSM_WINDOWS
102 : {"windows", DSM_IMPL_WINDOWS, false},
103 : #endif
104 : #ifdef USE_DSM_MMAP
105 : {"mmap", DSM_IMPL_MMAP, false},
106 : #endif
107 : {"none", DSM_IMPL_NONE, false},
108 : {NULL, 0, false}
109 : };
110 :
111 : /* Implementation selector. */
112 : int dynamic_shared_memory_type;
113 :
114 : /* Size of buffer to be used for zero-filling. */
115 : #define ZBUFFER_SIZE 8192
116 :
117 : #define SEGMENT_NAME_PREFIX "Global/PostgreSQL"
118 :
119 : /*------
120 : * Perform a low-level shared memory operation in a platform-specific way,
121 : * as dictated by the selected implementation. Each implementation is
122 : * required to implement the following primitives.
123 : *
124 : * DSM_OP_CREATE. Create a segment whose size is the request_size and
125 : * map it.
126 : *
127 : * DSM_OP_ATTACH. Map the segment, whose size must be the request_size.
128 : * The segment may already be mapped; any existing mapping should be removed
129 : * before creating a new one.
130 : *
131 : * DSM_OP_DETACH. Unmap the segment.
132 : *
133 : * DSM_OP_RESIZE. Resize the segment to the given request_size and
134 : * remap the segment at that new size.
135 : *
136 : * DSM_OP_DESTROY. Unmap the segment, if it is mapped. Destroy the
137 : * segment.
138 : *
139 : * Arguments:
140 : * op: The operation to be performed.
141 : * handle: The handle of an existing object, or for DSM_OP_CREATE, the
142 : * a new handle the caller wants created.
143 : * request_size: For DSM_OP_CREATE, the requested size. For DSM_OP_RESIZE,
144 : * the new size. Otherwise, 0.
145 : * impl_private: Private, implementation-specific data. Will be a pointer
146 : * to NULL for the first operation on a shared memory segment within this
147 : * backend; thereafter, it will point to the value to which it was set
148 : * on the previous call.
149 : * mapped_address: Pointer to start of current mapping; pointer to NULL
150 : * if none. Updated with new mapping address.
151 : * mapped_size: Pointer to size of current mapping; pointer to 0 if none.
152 : * Updated with new mapped size.
153 : * elevel: Level at which to log errors.
154 : *
155 : * Return value: true on success, false on failure. When false is returned,
156 : * a message should first be logged at the specified elevel, except in the
157 : * case where DSM_OP_CREATE experiences a name collision, which should
158 : * silently return false.
159 : *-----
160 : */
161 : bool
162 382 : dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
163 : void **impl_private, void **mapped_address, Size *mapped_size,
164 : int elevel)
165 : {
166 382 : Assert(op == DSM_OP_CREATE || op == DSM_OP_RESIZE || request_size == 0);
167 382 : Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
168 : (*mapped_address == NULL && *mapped_size == 0));
169 :
170 382 : switch (dynamic_shared_memory_type)
171 : {
172 : #ifdef USE_DSM_POSIX
173 : case DSM_IMPL_POSIX:
174 382 : return dsm_impl_posix(op, handle, request_size, impl_private,
175 : mapped_address, mapped_size, elevel);
176 : #endif
177 : #ifdef USE_DSM_SYSV
178 : case DSM_IMPL_SYSV:
179 0 : return dsm_impl_sysv(op, handle, request_size, impl_private,
180 : mapped_address, mapped_size, elevel);
181 : #endif
182 : #ifdef USE_DSM_WINDOWS
183 : case DSM_IMPL_WINDOWS:
184 : return dsm_impl_windows(op, handle, request_size, impl_private,
185 : mapped_address, mapped_size, elevel);
186 : #endif
187 : #ifdef USE_DSM_MMAP
188 : case DSM_IMPL_MMAP:
189 0 : return dsm_impl_mmap(op, handle, request_size, impl_private,
190 : mapped_address, mapped_size, elevel);
191 : #endif
192 : default:
193 0 : elog(ERROR, "unexpected dynamic shared memory type: %d",
194 : dynamic_shared_memory_type);
195 : return false;
196 : }
197 : }
198 :
199 : /*
200 : * Does the current dynamic shared memory implementation support resizing
201 : * segments? (The answer here could be platform-dependent in the future,
202 : * since AIX allows shmctl(shmid, SHM_RESIZE, &buffer), though you apparently
203 : * can't resize segments to anything larger than 256MB that way. For now,
204 : * we keep it simple.)
205 : */
206 : bool
207 0 : dsm_impl_can_resize(void)
208 : {
209 0 : switch (dynamic_shared_memory_type)
210 : {
211 : case DSM_IMPL_NONE:
212 0 : return false;
213 : case DSM_IMPL_POSIX:
214 0 : return true;
215 : case DSM_IMPL_SYSV:
216 0 : return false;
217 : case DSM_IMPL_WINDOWS:
218 0 : return false;
219 : case DSM_IMPL_MMAP:
220 0 : return true;
221 : default:
222 0 : return false; /* should not happen */
223 : }
224 : }
225 :
226 : #ifdef USE_DSM_POSIX
227 : /*
228 : * Operating system primitives to support POSIX shared memory.
229 : *
230 : * POSIX shared memory segments are created and attached using shm_open()
231 : * and shm_unlink(); other operations, such as sizing or mapping the
232 : * segment, are performed as if the shared memory segments were files.
233 : *
234 : * Indeed, on some platforms, they may be implemented that way. While
235 : * POSIX shared memory segments seem intended to exist in a flat namespace,
236 : * some operating systems may implement them as files, even going so far
237 : * to treat a request for /xyz as a request to create a file by that name
238 : * in the root directory. Users of such broken platforms should select
239 : * a different shared memory implementation.
240 : */
241 : static bool
242 382 : dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
243 : void **impl_private, void **mapped_address, Size *mapped_size,
244 : int elevel)
245 : {
246 : char name[64];
247 : int flags;
248 : int fd;
249 : char *address;
250 :
251 382 : snprintf(name, 64, "/PostgreSQL.%u", handle);
252 :
253 : /* Handle teardown cases. */
254 382 : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
255 : {
256 201 : if (*mapped_address != NULL
257 182 : && munmap(*mapped_address, *mapped_size) != 0)
258 : {
259 0 : ereport(elevel,
260 : (errcode_for_dynamic_shared_memory(),
261 : errmsg("could not unmap shared memory segment \"%s\": %m",
262 : name)));
263 0 : return false;
264 : }
265 201 : *mapped_address = NULL;
266 201 : *mapped_size = 0;
267 201 : if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
268 : {
269 0 : ereport(elevel,
270 : (errcode_for_dynamic_shared_memory(),
271 : errmsg("could not remove shared memory segment \"%s\": %m",
272 : name)));
273 0 : return false;
274 : }
275 201 : return true;
276 : }
277 :
278 : /*
279 : * Create new segment or open an existing one for attach or resize.
280 : *
281 : * Even though we're not going through fd.c, we should be safe against
282 : * running out of file descriptors, because of NUM_RESERVED_FDS. We're
283 : * only opening one extra descriptor here, and we'll close it before
284 : * returning.
285 : */
286 181 : flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
287 181 : if ((fd = shm_open(name, flags, 0600)) == -1)
288 : {
289 0 : if (errno != EEXIST)
290 0 : ereport(elevel,
291 : (errcode_for_dynamic_shared_memory(),
292 : errmsg("could not open shared memory segment \"%s\": %m",
293 : name)));
294 0 : return false;
295 : }
296 :
297 : /*
298 : * If we're attaching the segment, determine the current size; if we are
299 : * creating or resizing the segment, set the size to the requested value.
300 : */
301 181 : if (op == DSM_OP_ATTACH)
302 : {
303 : struct stat st;
304 :
305 159 : if (fstat(fd, &st) != 0)
306 : {
307 : int save_errno;
308 :
309 : /* Back out what's already been done. */
310 0 : save_errno = errno;
311 0 : close(fd);
312 0 : errno = save_errno;
313 :
314 0 : ereport(elevel,
315 : (errcode_for_dynamic_shared_memory(),
316 : errmsg("could not stat shared memory segment \"%s\": %m",
317 : name)));
318 0 : return false;
319 : }
320 159 : request_size = st.st_size;
321 : }
322 22 : else if (*mapped_size != request_size && ftruncate(fd, request_size))
323 : {
324 : int save_errno;
325 :
326 : /* Back out what's already been done. */
327 0 : save_errno = errno;
328 0 : close(fd);
329 0 : if (op == DSM_OP_CREATE)
330 0 : shm_unlink(name);
331 0 : errno = save_errno;
332 :
333 0 : ereport(elevel,
334 : (errcode_for_dynamic_shared_memory(),
335 : errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
336 : name, request_size)));
337 0 : return false;
338 : }
339 :
340 : /*
341 : * If we're reattaching or resizing, we must remove any existing mapping,
342 : * unless we've already got the right thing mapped.
343 : */
344 181 : if (*mapped_address != NULL)
345 : {
346 0 : if (*mapped_size == request_size)
347 0 : return true;
348 0 : if (munmap(*mapped_address, *mapped_size) != 0)
349 : {
350 : int save_errno;
351 :
352 : /* Back out what's already been done. */
353 0 : save_errno = errno;
354 0 : close(fd);
355 0 : if (op == DSM_OP_CREATE)
356 0 : shm_unlink(name);
357 0 : errno = save_errno;
358 :
359 0 : ereport(elevel,
360 : (errcode_for_dynamic_shared_memory(),
361 : errmsg("could not unmap shared memory segment \"%s\": %m",
362 : name)));
363 0 : return false;
364 : }
365 0 : *mapped_address = NULL;
366 0 : *mapped_size = 0;
367 : }
368 :
369 : /* Map it. */
370 181 : address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
371 : MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
372 181 : if (address == MAP_FAILED)
373 : {
374 : int save_errno;
375 :
376 : /* Back out what's already been done. */
377 0 : save_errno = errno;
378 0 : close(fd);
379 0 : if (op == DSM_OP_CREATE)
380 0 : shm_unlink(name);
381 0 : errno = save_errno;
382 :
383 0 : ereport(elevel,
384 : (errcode_for_dynamic_shared_memory(),
385 : errmsg("could not map shared memory segment \"%s\": %m",
386 : name)));
387 0 : return false;
388 : }
389 181 : *mapped_address = address;
390 181 : *mapped_size = request_size;
391 181 : close(fd);
392 :
393 181 : return true;
394 : }
395 : #endif
396 :
397 : #ifdef USE_DSM_SYSV
398 : /*
399 : * Operating system primitives to support System V shared memory.
400 : *
401 : * System V shared memory segments are manipulated using shmget(), shmat(),
402 : * shmdt(), and shmctl(). There's no portable way to resize such
403 : * segments. As the default allocation limits for System V shared memory
404 : * are usually quite low, the POSIX facilities may be preferable; but
405 : * those are not supported everywhere.
406 : */
407 : static bool
408 0 : dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
409 : void **impl_private, void **mapped_address, Size *mapped_size,
410 : int elevel)
411 : {
412 : key_t key;
413 : int ident;
414 : char *address;
415 : char name[64];
416 : int *ident_cache;
417 :
418 : /* Resize is not supported for System V shared memory. */
419 0 : if (op == DSM_OP_RESIZE)
420 : {
421 0 : elog(elevel, "System V shared memory segments cannot be resized");
422 0 : return false;
423 : }
424 :
425 : /* Since resize isn't supported, reattach is a no-op. */
426 0 : if (op == DSM_OP_ATTACH && *mapped_address != NULL)
427 0 : return true;
428 :
429 : /*
430 : * POSIX shared memory and mmap-based shared memory identify segments with
431 : * names. To avoid needless error message variation, we use the handle as
432 : * the name.
433 : */
434 0 : snprintf(name, 64, "%u", handle);
435 :
436 : /*
437 : * The System V shared memory namespace is very restricted; names are of
438 : * type key_t, which is expected to be some sort of integer data type, but
439 : * not necessarily the same one as dsm_handle. Since we use dsm_handle to
440 : * identify shared memory segments across processes, this might seem like
441 : * a problem, but it's really not. If dsm_handle is bigger than key_t,
442 : * the cast below might truncate away some bits from the handle the
443 : * user-provided, but it'll truncate exactly the same bits away in exactly
444 : * the same fashion every time we use that handle, which is all that
445 : * really matters. Conversely, if dsm_handle is smaller than key_t, we
446 : * won't use the full range of available key space, but that's no big deal
447 : * either.
448 : *
449 : * We do make sure that the key isn't negative, because that might not be
450 : * portable.
451 : */
452 0 : key = (key_t) handle;
453 0 : if (key < 1) /* avoid compiler warning if type is unsigned */
454 0 : key = -key;
455 :
456 : /*
457 : * There's one special key, IPC_PRIVATE, which can't be used. If we end
458 : * up with that value by chance during a create operation, just pretend it
459 : * already exists, so that caller will retry. If we run into it anywhere
460 : * else, the caller has passed a handle that doesn't correspond to
461 : * anything we ever created, which should not happen.
462 : */
463 0 : if (key == IPC_PRIVATE)
464 : {
465 0 : if (op != DSM_OP_CREATE)
466 0 : elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
467 0 : errno = EEXIST;
468 0 : return false;
469 : }
470 :
471 : /*
472 : * Before we can do anything with a shared memory segment, we have to map
473 : * the shared memory key to a shared memory identifier using shmget(). To
474 : * avoid repeated lookups, we store the key using impl_private.
475 : */
476 0 : if (*impl_private != NULL)
477 : {
478 0 : ident_cache = *impl_private;
479 0 : ident = *ident_cache;
480 : }
481 : else
482 : {
483 0 : int flags = IPCProtection;
484 : size_t segsize;
485 :
486 : /*
487 : * Allocate the memory BEFORE acquiring the resource, so that we don't
488 : * leak the resource if memory allocation fails.
489 : */
490 0 : ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
491 :
492 : /*
493 : * When using shmget to find an existing segment, we must pass the
494 : * size as 0. Passing a non-zero size which is greater than the
495 : * actual size will result in EINVAL.
496 : */
497 0 : segsize = 0;
498 :
499 0 : if (op == DSM_OP_CREATE)
500 : {
501 0 : flags |= IPC_CREAT | IPC_EXCL;
502 0 : segsize = request_size;
503 : }
504 :
505 0 : if ((ident = shmget(key, segsize, flags)) == -1)
506 : {
507 0 : if (errno != EEXIST)
508 : {
509 0 : int save_errno = errno;
510 :
511 0 : pfree(ident_cache);
512 0 : errno = save_errno;
513 0 : ereport(elevel,
514 : (errcode_for_dynamic_shared_memory(),
515 : errmsg("could not get shared memory segment: %m")));
516 : }
517 0 : return false;
518 : }
519 :
520 0 : *ident_cache = ident;
521 0 : *impl_private = ident_cache;
522 : }
523 :
524 : /* Handle teardown cases. */
525 0 : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
526 : {
527 0 : pfree(ident_cache);
528 0 : *impl_private = NULL;
529 0 : if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
530 : {
531 0 : ereport(elevel,
532 : (errcode_for_dynamic_shared_memory(),
533 : errmsg("could not unmap shared memory segment \"%s\": %m",
534 : name)));
535 0 : return false;
536 : }
537 0 : *mapped_address = NULL;
538 0 : *mapped_size = 0;
539 0 : if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
540 : {
541 0 : ereport(elevel,
542 : (errcode_for_dynamic_shared_memory(),
543 : errmsg("could not remove shared memory segment \"%s\": %m",
544 : name)));
545 0 : return false;
546 : }
547 0 : return true;
548 : }
549 :
550 : /* If we're attaching it, we must use IPC_STAT to determine the size. */
551 0 : if (op == DSM_OP_ATTACH)
552 : {
553 : struct shmid_ds shm;
554 :
555 0 : if (shmctl(ident, IPC_STAT, &shm) != 0)
556 : {
557 0 : ereport(elevel,
558 : (errcode_for_dynamic_shared_memory(),
559 : errmsg("could not stat shared memory segment \"%s\": %m",
560 : name)));
561 0 : return false;
562 : }
563 0 : request_size = shm.shm_segsz;
564 : }
565 :
566 : /* Map it. */
567 0 : address = shmat(ident, NULL, PG_SHMAT_FLAGS);
568 0 : if (address == (void *) -1)
569 : {
570 : int save_errno;
571 :
572 : /* Back out what's already been done. */
573 0 : save_errno = errno;
574 0 : if (op == DSM_OP_CREATE)
575 0 : shmctl(ident, IPC_RMID, NULL);
576 0 : errno = save_errno;
577 :
578 0 : ereport(elevel,
579 : (errcode_for_dynamic_shared_memory(),
580 : errmsg("could not map shared memory segment \"%s\": %m",
581 : name)));
582 0 : return false;
583 : }
584 0 : *mapped_address = address;
585 0 : *mapped_size = request_size;
586 :
587 0 : return true;
588 : }
589 : #endif
590 :
591 : #ifdef USE_DSM_WINDOWS
592 : /*
593 : * Operating system primitives to support Windows shared memory.
594 : *
595 : * Windows shared memory implementation is done using file mapping
596 : * which can be backed by either physical file or system paging file.
597 : * Current implementation uses system paging file as other effects
598 : * like performance are not clear for physical file and it is used in similar
599 : * way for main shared memory in windows.
600 : *
601 : * A memory mapping object is a kernel object - they always get deleted when
602 : * the last reference to them goes away, either explicitly via a CloseHandle or
603 : * when the process containing the reference exits.
604 : */
605 : static bool
606 : dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
607 : void **impl_private, void **mapped_address,
608 : Size *mapped_size, int elevel)
609 : {
610 : char *address;
611 : HANDLE hmap;
612 : char name[64];
613 : MEMORY_BASIC_INFORMATION info;
614 :
615 : /* Resize is not supported for Windows shared memory. */
616 : if (op == DSM_OP_RESIZE)
617 : {
618 : elog(elevel, "Windows shared memory segments cannot be resized");
619 : return false;
620 : }
621 :
622 : /* Since resize isn't supported, reattach is a no-op. */
623 : if (op == DSM_OP_ATTACH && *mapped_address != NULL)
624 : return true;
625 :
626 : /*
627 : * Storing the shared memory segment in the Global\ namespace, can allow
628 : * any process running in any session to access that file mapping object
629 : * provided that the caller has the required access rights. But to avoid
630 : * issues faced in main shared memory, we are using the naming convention
631 : * similar to main shared memory. We can change here once issue mentioned
632 : * in GetSharedMemName is resolved.
633 : */
634 : snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
635 :
636 : /*
637 : * Handle teardown cases. Since Windows automatically destroys the object
638 : * when no references reamin, we can treat it the same as detach.
639 : */
640 : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
641 : {
642 : if (*mapped_address != NULL
643 : && UnmapViewOfFile(*mapped_address) == 0)
644 : {
645 : _dosmaperr(GetLastError());
646 : ereport(elevel,
647 : (errcode_for_dynamic_shared_memory(),
648 : errmsg("could not unmap shared memory segment \"%s\": %m",
649 : name)));
650 : return false;
651 : }
652 : if (*impl_private != NULL
653 : && CloseHandle(*impl_private) == 0)
654 : {
655 : _dosmaperr(GetLastError());
656 : ereport(elevel,
657 : (errcode_for_dynamic_shared_memory(),
658 : errmsg("could not remove shared memory segment \"%s\": %m",
659 : name)));
660 : return false;
661 : }
662 :
663 : *impl_private = NULL;
664 : *mapped_address = NULL;
665 : *mapped_size = 0;
666 : return true;
667 : }
668 :
669 : /* Create new segment or open an existing one for attach. */
670 : if (op == DSM_OP_CREATE)
671 : {
672 : DWORD size_high;
673 : DWORD size_low;
674 : DWORD errcode;
675 :
676 : /* Shifts >= the width of the type are undefined. */
677 : #ifdef _WIN64
678 : size_high = request_size >> 32;
679 : #else
680 : size_high = 0;
681 : #endif
682 : size_low = (DWORD) request_size;
683 :
684 : /* CreateFileMapping might not clear the error code on success */
685 : SetLastError(0);
686 :
687 : hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
688 : NULL, /* Default security attrs */
689 : PAGE_READWRITE, /* Memory is read/write */
690 : size_high, /* Upper 32 bits of size */
691 : size_low, /* Lower 32 bits of size */
692 : name);
693 :
694 : errcode = GetLastError();
695 : if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
696 : {
697 : /*
698 : * On Windows, when the segment already exists, a handle for the
699 : * existing segment is returned. We must close it before
700 : * returning. However, if the existing segment is created by a
701 : * service, then it returns ERROR_ACCESS_DENIED. We don't do
702 : * _dosmaperr here, so errno won't be modified.
703 : */
704 : if (hmap)
705 : CloseHandle(hmap);
706 : return false;
707 : }
708 :
709 : if (!hmap)
710 : {
711 : _dosmaperr(errcode);
712 : ereport(elevel,
713 : (errcode_for_dynamic_shared_memory(),
714 : errmsg("could not create shared memory segment \"%s\": %m",
715 : name)));
716 : return false;
717 : }
718 : }
719 : else
720 : {
721 : hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
722 : FALSE, /* do not inherit the name */
723 : name); /* name of mapping object */
724 : if (!hmap)
725 : {
726 : _dosmaperr(GetLastError());
727 : ereport(elevel,
728 : (errcode_for_dynamic_shared_memory(),
729 : errmsg("could not open shared memory segment \"%s\": %m",
730 : name)));
731 : return false;
732 : }
733 : }
734 :
735 : /* Map it. */
736 : address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
737 : 0, 0, 0);
738 : if (!address)
739 : {
740 : int save_errno;
741 :
742 : _dosmaperr(GetLastError());
743 : /* Back out what's already been done. */
744 : save_errno = errno;
745 : CloseHandle(hmap);
746 : errno = save_errno;
747 :
748 : ereport(elevel,
749 : (errcode_for_dynamic_shared_memory(),
750 : errmsg("could not map shared memory segment \"%s\": %m",
751 : name)));
752 : return false;
753 : }
754 :
755 : /*
756 : * VirtualQuery gives size in page_size units, which is 4K for Windows. We
757 : * need size only when we are attaching, but it's better to get the size
758 : * when creating new segment to keep size consistent both for
759 : * DSM_OP_CREATE and DSM_OP_ATTACH.
760 : */
761 : if (VirtualQuery(address, &info, sizeof(info)) == 0)
762 : {
763 : int save_errno;
764 :
765 : _dosmaperr(GetLastError());
766 : /* Back out what's already been done. */
767 : save_errno = errno;
768 : UnmapViewOfFile(address);
769 : CloseHandle(hmap);
770 : errno = save_errno;
771 :
772 : ereport(elevel,
773 : (errcode_for_dynamic_shared_memory(),
774 : errmsg("could not stat shared memory segment \"%s\": %m",
775 : name)));
776 : return false;
777 : }
778 :
779 : *mapped_address = address;
780 : *mapped_size = info.RegionSize;
781 : *impl_private = hmap;
782 :
783 : return true;
784 : }
785 : #endif
786 :
787 : #ifdef USE_DSM_MMAP
788 : /*
789 : * Operating system primitives to support mmap-based shared memory.
790 : *
791 : * Calling this "shared memory" is somewhat of a misnomer, because what
792 : * we're really doing is creating a bunch of files and mapping them into
793 : * our address space. The operating system may feel obliged to
794 : * synchronize the contents to disk even if nothing is being paged out,
795 : * which will not serve us well. The user can relocate the pg_dynshmem
796 : * directory to a ramdisk to avoid this problem, if available.
797 : */
798 : static bool
799 0 : dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
800 : void **impl_private, void **mapped_address, Size *mapped_size,
801 : int elevel)
802 : {
803 : char name[64];
804 : int flags;
805 : int fd;
806 : char *address;
807 :
808 0 : snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
809 : handle);
810 :
811 : /* Handle teardown cases. */
812 0 : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
813 : {
814 0 : if (*mapped_address != NULL
815 0 : && munmap(*mapped_address, *mapped_size) != 0)
816 : {
817 0 : ereport(elevel,
818 : (errcode_for_dynamic_shared_memory(),
819 : errmsg("could not unmap shared memory segment \"%s\": %m",
820 : name)));
821 0 : return false;
822 : }
823 0 : *mapped_address = NULL;
824 0 : *mapped_size = 0;
825 0 : if (op == DSM_OP_DESTROY && unlink(name) != 0)
826 : {
827 0 : ereport(elevel,
828 : (errcode_for_dynamic_shared_memory(),
829 : errmsg("could not remove shared memory segment \"%s\": %m",
830 : name)));
831 0 : return false;
832 : }
833 0 : return true;
834 : }
835 :
836 : /* Create new segment or open an existing one for attach or resize. */
837 0 : flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
838 0 : if ((fd = OpenTransientFile(name, flags, 0600)) == -1)
839 : {
840 0 : if (errno != EEXIST)
841 0 : ereport(elevel,
842 : (errcode_for_dynamic_shared_memory(),
843 : errmsg("could not open shared memory segment \"%s\": %m",
844 : name)));
845 0 : return false;
846 : }
847 :
848 : /*
849 : * If we're attaching the segment, determine the current size; if we are
850 : * creating or resizing the segment, set the size to the requested value.
851 : */
852 0 : if (op == DSM_OP_ATTACH)
853 : {
854 : struct stat st;
855 :
856 0 : if (fstat(fd, &st) != 0)
857 : {
858 : int save_errno;
859 :
860 : /* Back out what's already been done. */
861 0 : save_errno = errno;
862 0 : CloseTransientFile(fd);
863 0 : errno = save_errno;
864 :
865 0 : ereport(elevel,
866 : (errcode_for_dynamic_shared_memory(),
867 : errmsg("could not stat shared memory segment \"%s\": %m",
868 : name)));
869 0 : return false;
870 : }
871 0 : request_size = st.st_size;
872 : }
873 0 : else if (*mapped_size > request_size && ftruncate(fd, request_size))
874 : {
875 : int save_errno;
876 :
877 : /* Back out what's already been done. */
878 0 : save_errno = errno;
879 0 : close(fd);
880 0 : if (op == DSM_OP_CREATE)
881 0 : unlink(name);
882 0 : errno = save_errno;
883 :
884 0 : ereport(elevel,
885 : (errcode_for_dynamic_shared_memory(),
886 : errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
887 : name, request_size)));
888 0 : return false;
889 : }
890 0 : else if (*mapped_size < request_size)
891 : {
892 : /*
893 : * Allocate a buffer full of zeros.
894 : *
895 : * Note: palloc zbuffer, instead of just using a local char array, to
896 : * ensure it is reasonably well-aligned; this may save a few cycles
897 : * transferring data to the kernel.
898 : */
899 0 : char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
900 0 : uint32 remaining = request_size;
901 0 : bool success = true;
902 :
903 : /*
904 : * Zero-fill the file. We have to do this the hard way to ensure that
905 : * all the file space has really been allocated, so that we don't
906 : * later seg fault when accessing the memory mapping. This is pretty
907 : * pessimal.
908 : */
909 0 : while (success && remaining > 0)
910 : {
911 0 : Size goal = remaining;
912 :
913 0 : if (goal > ZBUFFER_SIZE)
914 0 : goal = ZBUFFER_SIZE;
915 0 : pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
916 0 : if (write(fd, zbuffer, goal) == goal)
917 0 : remaining -= goal;
918 : else
919 0 : success = false;
920 0 : pgstat_report_wait_end();
921 : }
922 :
923 0 : if (!success)
924 : {
925 : int save_errno;
926 :
927 : /* Back out what's already been done. */
928 0 : save_errno = errno;
929 0 : CloseTransientFile(fd);
930 0 : if (op == DSM_OP_CREATE)
931 0 : unlink(name);
932 0 : errno = save_errno ? save_errno : ENOSPC;
933 :
934 0 : ereport(elevel,
935 : (errcode_for_dynamic_shared_memory(),
936 : errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
937 : name, request_size)));
938 0 : return false;
939 : }
940 : }
941 :
942 : /*
943 : * If we're reattaching or resizing, we must remove any existing mapping,
944 : * unless we've already got the right thing mapped.
945 : */
946 0 : if (*mapped_address != NULL)
947 : {
948 0 : if (*mapped_size == request_size)
949 0 : return true;
950 0 : if (munmap(*mapped_address, *mapped_size) != 0)
951 : {
952 : int save_errno;
953 :
954 : /* Back out what's already been done. */
955 0 : save_errno = errno;
956 0 : CloseTransientFile(fd);
957 0 : if (op == DSM_OP_CREATE)
958 0 : unlink(name);
959 0 : errno = save_errno;
960 :
961 0 : ereport(elevel,
962 : (errcode_for_dynamic_shared_memory(),
963 : errmsg("could not unmap shared memory segment \"%s\": %m",
964 : name)));
965 0 : return false;
966 : }
967 0 : *mapped_address = NULL;
968 0 : *mapped_size = 0;
969 : }
970 :
971 : /* Map it. */
972 0 : address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
973 : MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
974 0 : if (address == MAP_FAILED)
975 : {
976 : int save_errno;
977 :
978 : /* Back out what's already been done. */
979 0 : save_errno = errno;
980 0 : CloseTransientFile(fd);
981 0 : if (op == DSM_OP_CREATE)
982 0 : unlink(name);
983 0 : errno = save_errno;
984 :
985 0 : ereport(elevel,
986 : (errcode_for_dynamic_shared_memory(),
987 : errmsg("could not map shared memory segment \"%s\": %m",
988 : name)));
989 0 : return false;
990 : }
991 0 : *mapped_address = address;
992 0 : *mapped_size = request_size;
993 0 : CloseTransientFile(fd);
994 :
995 0 : return true;
996 : }
997 : #endif
998 :
999 : /*
1000 : * Implementation-specific actions that must be performed when a segment is to
1001 : * be preserved even when no backend has it attached.
1002 : *
1003 : * Except on Windows, we don't need to do anything at all. But since Windows
1004 : * cleans up segments automatically when no references remain, we duplicate
1005 : * the segment handle into the postmaster process. The postmaster needn't
1006 : * do anything to receive the handle; Windows transfers it automatically.
1007 : */
1008 : void
1009 2 : dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
1010 : void **impl_private_pm_handle)
1011 : {
1012 2 : switch (dynamic_shared_memory_type)
1013 : {
1014 : #ifdef USE_DSM_WINDOWS
1015 : case DSM_IMPL_WINDOWS:
1016 : {
1017 : HANDLE hmap;
1018 :
1019 : if (!DuplicateHandle(GetCurrentProcess(), impl_private,
1020 : PostmasterHandle, &hmap, 0, FALSE,
1021 : DUPLICATE_SAME_ACCESS))
1022 : {
1023 : char name[64];
1024 :
1025 : snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
1026 : _dosmaperr(GetLastError());
1027 : ereport(ERROR,
1028 : (errcode_for_dynamic_shared_memory(),
1029 : errmsg("could not duplicate handle for \"%s\": %m",
1030 : name)));
1031 : }
1032 :
1033 : /*
1034 : * Here, we remember the handle that we created in the
1035 : * postmaster process. This handle isn't actually usable in
1036 : * any process other than the postmaster, but that doesn't
1037 : * matter. We're just holding onto it so that, if the segment
1038 : * is unpinned, dsm_impl_unpin_segment can close it.
1039 : */
1040 : *impl_private_pm_handle = hmap;
1041 : break;
1042 : }
1043 : #endif
1044 : default:
1045 2 : break;
1046 : }
1047 2 : }
1048 :
1049 : /*
1050 : * Implementation-specific actions that must be performed when a segment is no
1051 : * longer to be preserved, so that it will be cleaned up when all backends
1052 : * have detached from it.
1053 : *
1054 : * Except on Windows, we don't need to do anything at all. For Windows, we
1055 : * close the extra handle that dsm_impl_pin_segment created in the
1056 : * postmaster's process space.
1057 : */
1058 : void
1059 2 : dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
1060 : {
1061 2 : switch (dynamic_shared_memory_type)
1062 : {
1063 : #ifdef USE_DSM_WINDOWS
1064 : case DSM_IMPL_WINDOWS:
1065 : {
1066 : if (*impl_private &&
1067 : !DuplicateHandle(PostmasterHandle, *impl_private,
1068 : NULL, NULL, 0, FALSE,
1069 : DUPLICATE_CLOSE_SOURCE))
1070 : {
1071 : char name[64];
1072 :
1073 : snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
1074 : _dosmaperr(GetLastError());
1075 : ereport(ERROR,
1076 : (errcode_for_dynamic_shared_memory(),
1077 : errmsg("could not duplicate handle for \"%s\": %m",
1078 : name)));
1079 : }
1080 :
1081 : *impl_private = NULL;
1082 : break;
1083 : }
1084 : #endif
1085 : default:
1086 2 : break;
1087 : }
1088 2 : }
1089 :
1090 : static int
1091 0 : errcode_for_dynamic_shared_memory(void)
1092 : {
1093 0 : if (errno == EFBIG || errno == ENOMEM)
1094 0 : return errcode(ERRCODE_OUT_OF_MEMORY);
1095 : else
1096 0 : return errcode_for_file_access();
1097 : }
|