Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * sysv_shmem.c
4 : * Implement shared memory using SysV facilities
5 : *
6 : * These routines used to be a fairly thin layer on top of SysV shared
7 : * memory functionality. With the addition of anonymous-shmem logic,
8 : * they're a bit fatter now. We still require a SysV shmem block to
9 : * exist, though, because mmap'd shmem provides no way to find out how
10 : * many processes are attached, which we need for interlocking purposes.
11 : *
12 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
13 : * Portions Copyright (c) 1994, Regents of the University of California
14 : *
15 : * IDENTIFICATION
16 : * src/backend/port/sysv_shmem.c
17 : *
18 : *-------------------------------------------------------------------------
19 : */
20 : #include "postgres.h"
21 :
22 : #include <signal.h>
23 : #include <unistd.h>
24 : #include <sys/file.h>
25 : #include <sys/mman.h>
26 : #include <sys/stat.h>
27 : #ifdef HAVE_SYS_IPC_H
28 : #include <sys/ipc.h>
29 : #endif
30 : #ifdef HAVE_SYS_SHM_H
31 : #include <sys/shm.h>
32 : #endif
33 :
34 : #include "miscadmin.h"
35 : #include "portability/mem.h"
36 : #include "storage/dsm.h"
37 : #include "storage/fd.h"
38 : #include "storage/ipc.h"
39 : #include "storage/pg_shmem.h"
40 : #include "utils/guc.h"
41 : #include "utils/pidfile.h"
42 :
43 :
44 : /*
45 : * As of PostgreSQL 9.3, we normally allocate only a very small amount of
46 : * System V shared memory, and only for the purposes of providing an
47 : * interlock to protect the data directory. The real shared memory block
48 : * is allocated using mmap(). This works around the problem that many
49 : * systems have very low limits on the amount of System V shared memory
50 : * that can be allocated. Even a limit of a few megabytes will be enough
51 : * to run many copies of PostgreSQL without needing to adjust system settings.
52 : *
53 : * We assume that no one will attempt to run PostgreSQL 9.3 or later on
54 : * systems that are ancient enough that anonymous shared memory is not
55 : * supported, such as pre-2.4 versions of Linux. If that turns out to be
56 : * false, we might need to add compile and/or run-time tests here and do this
57 : * only if the running kernel supports it.
58 : *
59 : * However, we must always disable this logic in the EXEC_BACKEND case, and
60 : * fall back to the old method of allocating the entire segment using System V
61 : * shared memory, because there's no way to attach an anonymous mmap'd segment
62 : * to a process after exec(). Since EXEC_BACKEND is intended only for
63 : * developer use, this shouldn't be a big problem. Because of this, we do
64 : * not worry about supporting anonymous shmem in the EXEC_BACKEND cases below.
65 : */
66 : #ifndef EXEC_BACKEND
67 : #define USE_ANONYMOUS_SHMEM
68 : #endif
69 :
70 :
71 : typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
72 : typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
73 :
74 :
75 : unsigned long UsedShmemSegID = 0;
76 : void *UsedShmemSegAddr = NULL;
77 :
78 : #ifdef USE_ANONYMOUS_SHMEM
79 : static Size AnonymousShmemSize;
80 : static void *AnonymousShmem = NULL;
81 : #endif
82 :
83 : static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
84 : static void IpcMemoryDetach(int status, Datum shmaddr);
85 : static void IpcMemoryDelete(int status, Datum shmId);
86 : static PGShmemHeader *PGSharedMemoryAttach(IpcMemoryKey key,
87 : IpcMemoryId *shmid);
88 :
89 :
90 : /*
91 : * InternalIpcMemoryCreate(memKey, size)
92 : *
93 : * Attempt to create a new shared memory segment with the specified key.
94 : * Will fail (return NULL) if such a segment already exists. If successful,
95 : * attach the segment to the current process and return its attached address.
96 : * On success, callbacks are registered with on_shmem_exit to detach and
97 : * delete the segment when on_shmem_exit is called.
98 : *
99 : * If we fail with a failure code other than collision-with-existing-segment,
100 : * print out an error and abort. Other types of errors are not recoverable.
101 : */
102 : static void *
103 5 : InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
104 : {
105 : IpcMemoryId shmid;
106 5 : void *requestedAddress = NULL;
107 : void *memAddress;
108 :
109 : /*
110 : * Normally we just pass requestedAddress = NULL to shmat(), allowing the
111 : * system to choose where the segment gets mapped. But in an EXEC_BACKEND
112 : * build, it's possible for whatever is chosen in the postmaster to not
113 : * work for backends, due to variations in address space layout. As a
114 : * rather klugy workaround, allow the user to specify the address to use
115 : * via setting the environment variable PG_SHMEM_ADDR. (If this were of
116 : * interest for anything except debugging, we'd probably create a cleaner
117 : * and better-documented way to set it, such as a GUC.)
118 : */
119 : #ifdef EXEC_BACKEND
120 : {
121 : char *pg_shmem_addr = getenv("PG_SHMEM_ADDR");
122 :
123 : if (pg_shmem_addr)
124 : requestedAddress = (void *) strtoul(pg_shmem_addr, NULL, 0);
125 : }
126 : #endif
127 :
128 5 : shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);
129 :
130 5 : if (shmid < 0)
131 : {
132 0 : int shmget_errno = errno;
133 :
134 : /*
135 : * Fail quietly if error indicates a collision with existing segment.
136 : * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
137 : * we could get a permission violation instead? Also, EIDRM might
138 : * occur if an old seg is slated for destruction but not gone yet.
139 : */
140 0 : if (shmget_errno == EEXIST || shmget_errno == EACCES
141 : #ifdef EIDRM
142 0 : || shmget_errno == EIDRM
143 : #endif
144 : )
145 0 : return NULL;
146 :
147 : /*
148 : * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if
149 : * there is an existing segment but it's smaller than "size" (this is
150 : * a result of poorly-thought-out ordering of error tests). To
151 : * distinguish between collision and invalid size in such cases, we
152 : * make a second try with size = 0. These kernels do not test size
153 : * against SHMMIN in the preexisting-segment case, so we will not get
154 : * EINVAL a second time if there is such a segment.
155 : */
156 0 : if (shmget_errno == EINVAL)
157 : {
158 0 : shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection);
159 :
160 0 : if (shmid < 0)
161 : {
162 : /* As above, fail quietly if we verify a collision */
163 0 : if (errno == EEXIST || errno == EACCES
164 : #ifdef EIDRM
165 0 : || errno == EIDRM
166 : #endif
167 : )
168 0 : return NULL;
169 : /* Otherwise, fall through to report the original error */
170 : }
171 : else
172 : {
173 : /*
174 : * On most platforms we cannot get here because SHMMIN is
175 : * greater than zero. However, if we do succeed in creating a
176 : * zero-size segment, free it and then fall through to report
177 : * the original error.
178 : */
179 0 : if (shmctl(shmid, IPC_RMID, NULL) < 0)
180 0 : elog(LOG, "shmctl(%d, %d, 0) failed: %m",
181 : (int) shmid, IPC_RMID);
182 : }
183 : }
184 :
185 : /*
186 : * Else complain and abort.
187 : *
188 : * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX
189 : * is violated. SHMALL violation might be reported as either ENOMEM
190 : * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which
191 : * it should be. SHMMNI violation is ENOSPC, per spec. Just plain
192 : * not-enough-RAM is ENOMEM.
193 : */
194 0 : errno = shmget_errno;
195 0 : ereport(FATAL,
196 : (errmsg("could not create shared memory segment: %m"),
197 : errdetail("Failed system call was shmget(key=%lu, size=%zu, 0%o).",
198 : (unsigned long) memKey, size,
199 : IPC_CREAT | IPC_EXCL | IPCProtection),
200 : (shmget_errno == EINVAL) ?
201 : errhint("This error usually means that PostgreSQL's request for a shared memory "
202 : "segment exceeded your kernel's SHMMAX parameter, or possibly that "
203 : "it is less than "
204 : "your kernel's SHMMIN parameter.\n"
205 : "The PostgreSQL documentation contains more information about shared "
206 : "memory configuration.") : 0,
207 : (shmget_errno == ENOMEM) ?
208 : errhint("This error usually means that PostgreSQL's request for a shared "
209 : "memory segment exceeded your kernel's SHMALL parameter. You might need "
210 : "to reconfigure the kernel with larger SHMALL.\n"
211 : "The PostgreSQL documentation contains more information about shared "
212 : "memory configuration.") : 0,
213 : (shmget_errno == ENOSPC) ?
214 : errhint("This error does *not* mean that you have run out of disk space. "
215 : "It occurs either if all available shared memory IDs have been taken, "
216 : "in which case you need to raise the SHMMNI parameter in your kernel, "
217 : "or because the system's overall limit for shared memory has been "
218 : "reached.\n"
219 : "The PostgreSQL documentation contains more information about shared "
220 : "memory configuration.") : 0));
221 : }
222 :
223 : /* Register on-exit routine to delete the new segment */
224 5 : on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
225 :
226 : /* OK, should be able to attach to the segment */
227 5 : memAddress = shmat(shmid, requestedAddress, PG_SHMAT_FLAGS);
228 :
229 5 : if (memAddress == (void *) -1)
230 0 : elog(FATAL, "shmat(id=%d, addr=%p, flags=0x%x) failed: %m",
231 : shmid, requestedAddress, PG_SHMAT_FLAGS);
232 :
233 : /* Register on-exit routine to detach new segment before deleting */
234 5 : on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));
235 :
236 : /*
237 : * Store shmem key and ID in data directory lockfile. Format to try to
238 : * keep it the same length always (trailing junk in the lockfile won't
239 : * hurt, but might confuse humans).
240 : */
241 : {
242 : char line[64];
243 :
244 5 : sprintf(line, "%9lu %9lu",
245 : (unsigned long) memKey, (unsigned long) shmid);
246 5 : AddToDataDirLockFile(LOCK_FILE_LINE_SHMEM_KEY, line);
247 : }
248 :
249 5 : return memAddress;
250 : }
251 :
252 : /****************************************************************************/
253 : /* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */
254 : /* from process' address space */
255 : /* (called as an on_shmem_exit callback, hence funny argument list) */
256 : /****************************************************************************/
257 : static void
258 5 : IpcMemoryDetach(int status, Datum shmaddr)
259 : {
260 : /* Detach System V shared memory block. */
261 5 : if (shmdt(DatumGetPointer(shmaddr)) < 0)
262 0 : elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr));
263 5 : }
264 :
265 : /****************************************************************************/
266 : /* IpcMemoryDelete(status, shmId) deletes a shared memory segment */
267 : /* (called as an on_shmem_exit callback, hence funny argument list) */
268 : /****************************************************************************/
269 : static void
270 5 : IpcMemoryDelete(int status, Datum shmId)
271 : {
272 5 : if (shmctl(DatumGetInt32(shmId), IPC_RMID, NULL) < 0)
273 0 : elog(LOG, "shmctl(%d, %d, 0) failed: %m",
274 : DatumGetInt32(shmId), IPC_RMID);
275 5 : }
276 :
277 : /*
278 : * PGSharedMemoryIsInUse
279 : *
280 : * Is a previously-existing shmem segment still existing and in use?
281 : *
282 : * The point of this exercise is to detect the case where a prior postmaster
283 : * crashed, but it left child backends that are still running. Therefore
284 : * we only care about shmem segments that are associated with the intended
285 : * DataDir. This is an important consideration since accidental matches of
286 : * shmem segment IDs are reasonably common.
287 : */
288 : bool
289 0 : PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
290 : {
291 0 : IpcMemoryId shmId = (IpcMemoryId) id2;
292 : struct shmid_ds shmStat;
293 : struct stat statbuf;
294 : PGShmemHeader *hdr;
295 :
296 : /*
297 : * We detect whether a shared memory segment is in use by seeing whether
298 : * it (a) exists and (b) has any processes attached to it.
299 : */
300 0 : if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
301 : {
302 : /*
303 : * EINVAL actually has multiple possible causes documented in the
304 : * shmctl man page, but we assume it must mean the segment no longer
305 : * exists.
306 : */
307 0 : if (errno == EINVAL)
308 0 : return false;
309 :
310 : /*
311 : * EACCES implies that the segment belongs to some other userid, which
312 : * means it is not a Postgres shmem segment (or at least, not one that
313 : * is relevant to our data directory).
314 : */
315 0 : if (errno == EACCES)
316 0 : return false;
317 :
318 : /*
319 : * Some Linux kernel versions (in fact, all of them as of July 2007)
320 : * sometimes return EIDRM when EINVAL is correct. The Linux kernel
321 : * actually does not have any internal state that would justify
322 : * returning EIDRM, so we can get away with assuming that EIDRM is
323 : * equivalent to EINVAL on that platform.
324 : */
325 : #ifdef HAVE_LINUX_EIDRM_BUG
326 0 : if (errno == EIDRM)
327 0 : return false;
328 : #endif
329 :
330 : /*
331 : * Otherwise, we had better assume that the segment is in use. The
332 : * only likely case is EIDRM, which implies that the segment has been
333 : * IPC_RMID'd but there are still processes attached to it.
334 : */
335 0 : return true;
336 : }
337 :
338 : /* If it has no attached processes, it's not in use */
339 0 : if (shmStat.shm_nattch == 0)
340 0 : return false;
341 :
342 : /*
343 : * Try to attach to the segment and see if it matches our data directory.
344 : * This avoids shmid-conflict problems on machines that are running
345 : * several postmasters under the same userid.
346 : */
347 0 : if (stat(DataDir, &statbuf) < 0)
348 0 : return true; /* if can't stat, be conservative */
349 :
350 0 : hdr = (PGShmemHeader *) shmat(shmId, NULL, PG_SHMAT_FLAGS);
351 :
352 0 : if (hdr == (PGShmemHeader *) -1)
353 0 : return true; /* if can't attach, be conservative */
354 :
355 0 : if (hdr->magic != PGShmemMagic ||
356 0 : hdr->device != statbuf.st_dev ||
357 0 : hdr->inode != statbuf.st_ino)
358 : {
359 : /*
360 : * It's either not a Postgres segment, or not one for my data
361 : * directory. In either case it poses no threat.
362 : */
363 0 : shmdt((void *) hdr);
364 0 : return false;
365 : }
366 :
367 : /* Trouble --- looks a lot like there's still live backends */
368 0 : shmdt((void *) hdr);
369 :
370 0 : return true;
371 : }
372 :
373 : #ifdef USE_ANONYMOUS_SHMEM
374 :
375 : #ifdef MAP_HUGETLB
376 :
377 : /*
378 : * Identify the huge page size to use.
379 : *
380 : * Some Linux kernel versions have a bug causing mmap() to fail on requests
381 : * that are not a multiple of the hugepage size. Versions without that bug
382 : * instead silently round the request up to the next hugepage multiple ---
383 : * and then munmap() fails when we give it a size different from that.
384 : * So we have to round our request up to a multiple of the actual hugepage
385 : * size to avoid trouble.
386 : *
387 : * Doing the round-up ourselves also lets us make use of the extra memory,
388 : * rather than just wasting it. Currently, we just increase the available
389 : * space recorded in the shmem header, which will make the extra usable for
390 : * purposes such as additional locktable entries. Someday, for very large
391 : * hugepage sizes, we might want to think about more invasive strategies,
392 : * such as increasing shared_buffers to absorb the extra space.
393 : *
394 : * Returns the (real or assumed) page size into *hugepagesize,
395 : * and the hugepage-related mmap flags to use into *mmap_flags.
396 : *
397 : * Currently *mmap_flags is always just MAP_HUGETLB. Someday, on systems
398 : * that support it, we might OR in additional bits to specify a particular
399 : * non-default huge page size.
400 : */
401 : static void
402 5 : GetHugePageSize(Size *hugepagesize, int *mmap_flags)
403 : {
404 : /*
405 : * If we fail to find out the system's default huge page size, assume it
406 : * is 2MB. This will work fine when the actual size is less. If it's
407 : * more, we might get mmap() or munmap() failures due to unaligned
408 : * requests; but at this writing, there are no reports of any non-Linux
409 : * systems being picky about that.
410 : */
411 5 : *hugepagesize = 2 * 1024 * 1024;
412 5 : *mmap_flags = MAP_HUGETLB;
413 :
414 : /*
415 : * System-dependent code to find out the default huge page size.
416 : *
417 : * On Linux, read /proc/meminfo looking for a line like "Hugepagesize:
418 : * nnnn kB". Ignore any failures, falling back to the preset default.
419 : */
420 : #ifdef __linux__
421 : {
422 5 : FILE *fp = AllocateFile("/proc/meminfo", "r");
423 : char buf[128];
424 : unsigned int sz;
425 : char ch;
426 :
427 5 : if (fp)
428 : {
429 230 : while (fgets(buf, sizeof(buf), fp))
430 : {
431 225 : if (sscanf(buf, "Hugepagesize: %u %c", &sz, &ch) == 2)
432 : {
433 5 : if (ch == 'k')
434 : {
435 5 : *hugepagesize = sz * (Size) 1024;
436 5 : break;
437 : }
438 : /* We could accept other units besides kB, if needed */
439 : }
440 : }
441 5 : FreeFile(fp);
442 : }
443 : }
444 : #endif /* __linux__ */
445 5 : }
446 :
447 : #endif /* MAP_HUGETLB */
448 :
449 : /*
450 : * Creates an anonymous mmap()ed shared memory segment.
451 : *
452 : * Pass the requested size in *size. This function will modify *size to the
453 : * actual size of the allocation, if it ends up allocating a segment that is
454 : * larger than requested.
455 : */
456 : static void *
457 5 : CreateAnonymousSegment(Size *size)
458 : {
459 5 : Size allocsize = *size;
460 5 : void *ptr = MAP_FAILED;
461 5 : int mmap_errno = 0;
462 :
463 : #ifndef MAP_HUGETLB
464 : /* PGSharedMemoryCreate should have dealt with this case */
465 : Assert(huge_pages != HUGE_PAGES_ON);
466 : #else
467 5 : if (huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY)
468 : {
469 : /*
470 : * Round up the request size to a suitable large value.
471 : */
472 : Size hugepagesize;
473 : int mmap_flags;
474 :
475 5 : GetHugePageSize(&hugepagesize, &mmap_flags);
476 :
477 5 : if (allocsize % hugepagesize != 0)
478 5 : allocsize += hugepagesize - (allocsize % hugepagesize);
479 :
480 5 : ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
481 : PG_MMAP_FLAGS | mmap_flags, -1, 0);
482 5 : mmap_errno = errno;
483 5 : if (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED)
484 5 : elog(DEBUG1, "mmap(%zu) with MAP_HUGETLB failed, huge pages disabled: %m",
485 : allocsize);
486 : }
487 : #endif
488 :
489 5 : if (ptr == MAP_FAILED && huge_pages != HUGE_PAGES_ON)
490 : {
491 : /*
492 : * Use the original size, not the rounded-up value, when falling back
493 : * to non-huge pages.
494 : */
495 5 : allocsize = *size;
496 5 : ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
497 : PG_MMAP_FLAGS, -1, 0);
498 5 : mmap_errno = errno;
499 : }
500 :
501 5 : if (ptr == MAP_FAILED)
502 : {
503 0 : errno = mmap_errno;
504 0 : ereport(FATAL,
505 : (errmsg("could not map anonymous shared memory: %m"),
506 : (mmap_errno == ENOMEM) ?
507 : errhint("This error usually means that PostgreSQL's request "
508 : "for a shared memory segment exceeded available memory, "
509 : "swap space, or huge pages. To reduce the request size "
510 : "(currently %zu bytes), reduce PostgreSQL's shared "
511 : "memory usage, perhaps by reducing shared_buffers or "
512 : "max_connections.",
513 : *size) : 0));
514 : }
515 :
516 5 : *size = allocsize;
517 5 : return ptr;
518 : }
519 :
520 : /*
521 : * AnonymousShmemDetach --- detach from an anonymous mmap'd block
522 : * (called as an on_shmem_exit callback, hence funny argument list)
523 : */
524 : static void
525 5 : AnonymousShmemDetach(int status, Datum arg)
526 : {
527 : /* Release anonymous shared memory block, if any. */
528 5 : if (AnonymousShmem != NULL)
529 : {
530 5 : if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
531 0 : elog(LOG, "munmap(%p, %zu) failed: %m",
532 : AnonymousShmem, AnonymousShmemSize);
533 5 : AnonymousShmem = NULL;
534 : }
535 5 : }
536 :
537 : #endif /* USE_ANONYMOUS_SHMEM */
538 :
539 : /*
540 : * PGSharedMemoryCreate
541 : *
542 : * Create a shared memory segment of the given size and initialize its
543 : * standard header. Also, register an on_shmem_exit callback to release
544 : * the storage.
545 : *
546 : * Dead Postgres segments are recycled if found, but we do not fail upon
547 : * collision with non-Postgres shmem segments. The idea here is to detect and
548 : * re-use keys that may have been assigned by a crashed postmaster or backend.
549 : *
550 : * makePrivate means to always create a new segment, rather than attach to
551 : * or recycle any existing segment.
552 : *
553 : * The port number is passed for possible use as a key (for SysV, we use
554 : * it to generate the starting shmem key). In a standalone backend,
555 : * zero will be passed.
556 : */
557 : PGShmemHeader *
558 5 : PGSharedMemoryCreate(Size size, bool makePrivate, int port,
559 : PGShmemHeader **shim)
560 : {
561 : IpcMemoryKey NextShmemSegID;
562 : void *memAddress;
563 : PGShmemHeader *hdr;
564 : IpcMemoryId shmid;
565 : struct stat statbuf;
566 : Size sysvsize;
567 :
568 : /* Complain if hugepages demanded but we can't possibly support them */
569 : #if !defined(USE_ANONYMOUS_SHMEM) || !defined(MAP_HUGETLB)
570 : if (huge_pages == HUGE_PAGES_ON)
571 : ereport(ERROR,
572 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
573 : errmsg("huge pages not supported on this platform")));
574 : #endif
575 :
576 : /* Room for a header? */
577 5 : Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
578 :
579 : #ifdef USE_ANONYMOUS_SHMEM
580 5 : AnonymousShmem = CreateAnonymousSegment(&size);
581 5 : AnonymousShmemSize = size;
582 :
583 : /* Register on-exit routine to unmap the anonymous segment */
584 5 : on_shmem_exit(AnonymousShmemDetach, (Datum) 0);
585 :
586 : /* Now we need only allocate a minimal-sized SysV shmem block. */
587 5 : sysvsize = sizeof(PGShmemHeader);
588 : #else
589 : sysvsize = size;
590 : #endif
591 :
592 : /* Make sure PGSharedMemoryAttach doesn't fail without need */
593 5 : UsedShmemSegAddr = NULL;
594 :
595 : /* Loop till we find a free IPC key */
596 5 : NextShmemSegID = port * 1000;
597 :
598 5 : for (NextShmemSegID++;; NextShmemSegID++)
599 : {
600 : /* Try to create new segment */
601 5 : memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
602 5 : if (memAddress)
603 5 : break; /* successful create and attach */
604 :
605 : /* Check shared memory and possibly remove and recreate */
606 :
607 0 : if (makePrivate) /* a standalone backend shouldn't do this */
608 0 : continue;
609 :
610 0 : if ((memAddress = PGSharedMemoryAttach(NextShmemSegID, &shmid)) == NULL)
611 0 : continue; /* can't attach, not one of mine */
612 :
613 : /*
614 : * If I am not the creator and it belongs to an extant process,
615 : * continue.
616 : */
617 0 : hdr = (PGShmemHeader *) memAddress;
618 0 : if (hdr->creatorPID != getpid())
619 : {
620 0 : if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH)
621 : {
622 0 : shmdt(memAddress);
623 0 : continue; /* segment belongs to a live process */
624 : }
625 : }
626 :
627 : /*
628 : * The segment appears to be from a dead Postgres process, or from a
629 : * previous cycle of life in this same process. Zap it, if possible,
630 : * and any associated dynamic shared memory segments, as well. This
631 : * probably shouldn't fail, but if it does, assume the segment belongs
632 : * to someone else after all, and continue quietly.
633 : */
634 0 : if (hdr->dsm_control != 0)
635 0 : dsm_cleanup_using_control_segment(hdr->dsm_control);
636 0 : shmdt(memAddress);
637 0 : if (shmctl(shmid, IPC_RMID, NULL) < 0)
638 0 : continue;
639 :
640 : /*
641 : * Now try again to create the segment.
642 : */
643 0 : memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
644 0 : if (memAddress)
645 0 : break; /* successful create and attach */
646 :
647 : /*
648 : * Can only get here if some other process managed to create the same
649 : * shmem key before we did. Let him have that one, loop around to try
650 : * next key.
651 : */
652 0 : }
653 :
654 : /*
655 : * OK, we created a new segment. Mark it as created by this process. The
656 : * order of assignments here is critical so that another Postgres process
657 : * can't see the header as valid but belonging to an invalid PID!
658 : */
659 5 : hdr = (PGShmemHeader *) memAddress;
660 5 : hdr->creatorPID = getpid();
661 5 : hdr->magic = PGShmemMagic;
662 5 : hdr->dsm_control = 0;
663 :
664 : /* Fill in the data directory ID info, too */
665 5 : if (stat(DataDir, &statbuf) < 0)
666 0 : ereport(FATAL,
667 : (errcode_for_file_access(),
668 : errmsg("could not stat data directory \"%s\": %m",
669 : DataDir)));
670 5 : hdr->device = statbuf.st_dev;
671 5 : hdr->inode = statbuf.st_ino;
672 :
673 : /*
674 : * Initialize space allocation status for segment.
675 : */
676 5 : hdr->totalsize = size;
677 5 : hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
678 5 : *shim = hdr;
679 :
680 : /* Save info for possible future use */
681 5 : UsedShmemSegAddr = memAddress;
682 5 : UsedShmemSegID = (unsigned long) NextShmemSegID;
683 :
684 : /*
685 : * If AnonymousShmem is NULL here, then we're not using anonymous shared
686 : * memory, and should return a pointer to the System V shared memory
687 : * block. Otherwise, the System V shared memory block is only a shim, and
688 : * we must return a pointer to the real block.
689 : */
690 : #ifdef USE_ANONYMOUS_SHMEM
691 5 : if (AnonymousShmem == NULL)
692 0 : return hdr;
693 5 : memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader));
694 5 : return (PGShmemHeader *) AnonymousShmem;
695 : #else
696 : return hdr;
697 : #endif
698 : }
699 :
700 : #ifdef EXEC_BACKEND
701 :
702 : /*
703 : * PGSharedMemoryReAttach
704 : *
705 : * This is called during startup of a postmaster child process to re-attach to
706 : * an already existing shared memory segment. This is needed only in the
707 : * EXEC_BACKEND case; otherwise postmaster children inherit the shared memory
708 : * segment attachment via fork().
709 : *
710 : * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
711 : * routine. The caller must have already restored them to the postmaster's
712 : * values.
713 : */
714 : void
715 : PGSharedMemoryReAttach(void)
716 : {
717 : IpcMemoryId shmid;
718 : void *hdr;
719 : void *origUsedShmemSegAddr = UsedShmemSegAddr;
720 :
721 : Assert(UsedShmemSegAddr != NULL);
722 : Assert(IsUnderPostmaster);
723 :
724 : #ifdef __CYGWIN__
725 : /* cygipc (currently) appears to not detach on exec. */
726 : PGSharedMemoryDetach();
727 : UsedShmemSegAddr = origUsedShmemSegAddr;
728 : #endif
729 :
730 : elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
731 : hdr = (void *) PGSharedMemoryAttach((IpcMemoryKey) UsedShmemSegID, &shmid);
732 : if (hdr == NULL)
733 : elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
734 : (int) UsedShmemSegID, UsedShmemSegAddr);
735 : if (hdr != origUsedShmemSegAddr)
736 : elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
737 : hdr, origUsedShmemSegAddr);
738 : dsm_set_control_handle(((PGShmemHeader *) hdr)->dsm_control);
739 :
740 : UsedShmemSegAddr = hdr; /* probably redundant */
741 : }
742 :
743 : /*
744 : * PGSharedMemoryNoReAttach
745 : *
746 : * This is called during startup of a postmaster child process when we choose
747 : * *not* to re-attach to the existing shared memory segment. We must clean up
748 : * to leave things in the appropriate state. This is not used in the non
749 : * EXEC_BACKEND case, either.
750 : *
751 : * The child process startup logic might or might not call PGSharedMemoryDetach
752 : * after this; make sure that it will be a no-op if called.
753 : *
754 : * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
755 : * routine. The caller must have already restored them to the postmaster's
756 : * values.
757 : */
758 : void
759 : PGSharedMemoryNoReAttach(void)
760 : {
761 : Assert(UsedShmemSegAddr != NULL);
762 : Assert(IsUnderPostmaster);
763 :
764 : #ifdef __CYGWIN__
765 : /* cygipc (currently) appears to not detach on exec. */
766 : PGSharedMemoryDetach();
767 : #endif
768 :
769 : /* For cleanliness, reset UsedShmemSegAddr to show we're not attached. */
770 : UsedShmemSegAddr = NULL;
771 : /* And the same for UsedShmemSegID. */
772 : UsedShmemSegID = 0;
773 : }
774 :
775 : #endif /* EXEC_BACKEND */
776 :
777 : /*
778 : * PGSharedMemoryDetach
779 : *
780 : * Detach from the shared memory segment, if still attached. This is not
781 : * intended to be called explicitly by the process that originally created the
782 : * segment (it will have on_shmem_exit callback(s) registered to do that).
783 : * Rather, this is for subprocesses that have inherited an attachment and want
784 : * to get rid of it.
785 : *
786 : * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
787 : * routine, also AnonymousShmem and AnonymousShmemSize.
788 : */
789 : void
790 1 : PGSharedMemoryDetach(void)
791 : {
792 1 : if (UsedShmemSegAddr != NULL)
793 : {
794 1 : if ((shmdt(UsedShmemSegAddr) < 0)
795 : #if defined(EXEC_BACKEND) && defined(__CYGWIN__)
796 : /* Work-around for cygipc exec bug */
797 : && shmdt(NULL) < 0
798 : #endif
799 : )
800 0 : elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr);
801 1 : UsedShmemSegAddr = NULL;
802 : }
803 :
804 : #ifdef USE_ANONYMOUS_SHMEM
805 1 : if (AnonymousShmem != NULL)
806 : {
807 1 : if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
808 0 : elog(LOG, "munmap(%p, %zu) failed: %m",
809 : AnonymousShmem, AnonymousShmemSize);
810 1 : AnonymousShmem = NULL;
811 : }
812 : #endif
813 1 : }
814 :
815 :
816 : /*
817 : * Attach to shared memory and make sure it has a Postgres header
818 : *
819 : * Returns attach address if OK, else NULL
820 : */
821 : static PGShmemHeader *
822 0 : PGSharedMemoryAttach(IpcMemoryKey key, IpcMemoryId *shmid)
823 : {
824 : PGShmemHeader *hdr;
825 :
826 0 : if ((*shmid = shmget(key, sizeof(PGShmemHeader), 0)) < 0)
827 0 : return NULL;
828 :
829 0 : hdr = (PGShmemHeader *) shmat(*shmid, UsedShmemSegAddr, PG_SHMAT_FLAGS);
830 :
831 0 : if (hdr == (PGShmemHeader *) -1)
832 0 : return NULL; /* failed: must be some other app's */
833 :
834 0 : if (hdr->magic != PGShmemMagic)
835 : {
836 0 : shmdt((void *) hdr);
837 0 : return NULL; /* segment belongs to a non-Postgres app */
838 : }
839 :
840 0 : return hdr;
841 : }
|