LCOV - code coverage report
Current view: top level - src/backend/port - pg_shmem.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 88 164 53.7 %
Date: 2017-09-29 15:12:54 Functions: 8 10 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * sysv_shmem.c
       4             :  *    Implement shared memory using SysV facilities
       5             :  *
       6             :  * These routines used to be a fairly thin layer on top of SysV shared
       7             :  * memory functionality.  With the addition of anonymous-shmem logic,
       8             :  * they're a bit fatter now.  We still require a SysV shmem block to
       9             :  * exist, though, because mmap'd shmem provides no way to find out how
      10             :  * many processes are attached, which we need for interlocking purposes.
      11             :  *
      12             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      13             :  * Portions Copyright (c) 1994, Regents of the University of California
      14             :  *
      15             :  * IDENTIFICATION
      16             :  *    src/backend/port/sysv_shmem.c
      17             :  *
      18             :  *-------------------------------------------------------------------------
      19             :  */
      20             : #include "postgres.h"
      21             : 
      22             : #include <signal.h>
      23             : #include <unistd.h>
      24             : #include <sys/file.h>
      25             : #include <sys/mman.h>
      26             : #include <sys/stat.h>
      27             : #ifdef HAVE_SYS_IPC_H
      28             : #include <sys/ipc.h>
      29             : #endif
      30             : #ifdef HAVE_SYS_SHM_H
      31             : #include <sys/shm.h>
      32             : #endif
      33             : 
      34             : #include "miscadmin.h"
      35             : #include "portability/mem.h"
      36             : #include "storage/dsm.h"
      37             : #include "storage/fd.h"
      38             : #include "storage/ipc.h"
      39             : #include "storage/pg_shmem.h"
      40             : #include "utils/guc.h"
      41             : #include "utils/pidfile.h"
      42             : 
      43             : 
      44             : /*
      45             :  * As of PostgreSQL 9.3, we normally allocate only a very small amount of
      46             :  * System V shared memory, and only for the purposes of providing an
      47             :  * interlock to protect the data directory.  The real shared memory block
      48             :  * is allocated using mmap().  This works around the problem that many
      49             :  * systems have very low limits on the amount of System V shared memory
      50             :  * that can be allocated.  Even a limit of a few megabytes will be enough
      51             :  * to run many copies of PostgreSQL without needing to adjust system settings.
      52             :  *
      53             :  * We assume that no one will attempt to run PostgreSQL 9.3 or later on
      54             :  * systems that are ancient enough that anonymous shared memory is not
      55             :  * supported, such as pre-2.4 versions of Linux.  If that turns out to be
      56             :  * false, we might need to add compile and/or run-time tests here and do this
      57             :  * only if the running kernel supports it.
      58             :  *
      59             :  * However, we must always disable this logic in the EXEC_BACKEND case, and
      60             :  * fall back to the old method of allocating the entire segment using System V
      61             :  * shared memory, because there's no way to attach an anonymous mmap'd segment
      62             :  * to a process after exec().  Since EXEC_BACKEND is intended only for
      63             :  * developer use, this shouldn't be a big problem.  Because of this, we do
      64             :  * not worry about supporting anonymous shmem in the EXEC_BACKEND cases below.
      65             :  */
      66             : #ifndef EXEC_BACKEND
      67             : #define USE_ANONYMOUS_SHMEM
      68             : #endif
      69             : 
      70             : 
      71             : typedef key_t IpcMemoryKey;     /* shared memory key passed to shmget(2) */
      72             : typedef int IpcMemoryId;        /* shared memory ID returned by shmget(2) */
      73             : 
      74             : 
      75             : unsigned long UsedShmemSegID = 0;
      76             : void       *UsedShmemSegAddr = NULL;
      77             : 
      78             : #ifdef USE_ANONYMOUS_SHMEM
      79             : static Size AnonymousShmemSize;
      80             : static void *AnonymousShmem = NULL;
      81             : #endif
      82             : 
      83             : static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
      84             : static void IpcMemoryDetach(int status, Datum shmaddr);
      85             : static void IpcMemoryDelete(int status, Datum shmId);
      86             : static PGShmemHeader *PGSharedMemoryAttach(IpcMemoryKey key,
      87             :                      IpcMemoryId *shmid);
      88             : 
      89             : 
      90             : /*
      91             :  *  InternalIpcMemoryCreate(memKey, size)
      92             :  *
      93             :  * Attempt to create a new shared memory segment with the specified key.
      94             :  * Will fail (return NULL) if such a segment already exists.  If successful,
      95             :  * attach the segment to the current process and return its attached address.
      96             :  * On success, callbacks are registered with on_shmem_exit to detach and
      97             :  * delete the segment when on_shmem_exit is called.
      98             :  *
      99             :  * If we fail with a failure code other than collision-with-existing-segment,
     100             :  * print out an error and abort.  Other types of errors are not recoverable.
     101             :  */
     102             : static void *
     103           5 : InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
     104             : {
     105             :     IpcMemoryId shmid;
     106           5 :     void       *requestedAddress = NULL;
     107             :     void       *memAddress;
     108             : 
     109             :     /*
     110             :      * Normally we just pass requestedAddress = NULL to shmat(), allowing the
     111             :      * system to choose where the segment gets mapped.  But in an EXEC_BACKEND
     112             :      * build, it's possible for whatever is chosen in the postmaster to not
     113             :      * work for backends, due to variations in address space layout.  As a
     114             :      * rather klugy workaround, allow the user to specify the address to use
     115             :      * via setting the environment variable PG_SHMEM_ADDR.  (If this were of
     116             :      * interest for anything except debugging, we'd probably create a cleaner
     117             :      * and better-documented way to set it, such as a GUC.)
     118             :      */
     119             : #ifdef EXEC_BACKEND
     120             :     {
     121             :         char       *pg_shmem_addr = getenv("PG_SHMEM_ADDR");
     122             : 
     123             :         if (pg_shmem_addr)
     124             :             requestedAddress = (void *) strtoul(pg_shmem_addr, NULL, 0);
     125             :     }
     126             : #endif
     127             : 
     128           5 :     shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);
     129             : 
     130           5 :     if (shmid < 0)
     131             :     {
     132           0 :         int         shmget_errno = errno;
     133             : 
     134             :         /*
     135             :          * Fail quietly if error indicates a collision with existing segment.
     136             :          * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
     137             :          * we could get a permission violation instead?  Also, EIDRM might
     138             :          * occur if an old seg is slated for destruction but not gone yet.
     139             :          */
     140           0 :         if (shmget_errno == EEXIST || shmget_errno == EACCES
     141             : #ifdef EIDRM
     142           0 :             || shmget_errno == EIDRM
     143             : #endif
     144             :             )
     145           0 :             return NULL;
     146             : 
     147             :         /*
     148             :          * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if
     149             :          * there is an existing segment but it's smaller than "size" (this is
     150             :          * a result of poorly-thought-out ordering of error tests). To
     151             :          * distinguish between collision and invalid size in such cases, we
     152             :          * make a second try with size = 0.  These kernels do not test size
     153             :          * against SHMMIN in the preexisting-segment case, so we will not get
     154             :          * EINVAL a second time if there is such a segment.
     155             :          */
     156           0 :         if (shmget_errno == EINVAL)
     157             :         {
     158           0 :             shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection);
     159             : 
     160           0 :             if (shmid < 0)
     161             :             {
     162             :                 /* As above, fail quietly if we verify a collision */
     163           0 :                 if (errno == EEXIST || errno == EACCES
     164             : #ifdef EIDRM
     165           0 :                     || errno == EIDRM
     166             : #endif
     167             :                     )
     168           0 :                     return NULL;
     169             :                 /* Otherwise, fall through to report the original error */
     170             :             }
     171             :             else
     172             :             {
     173             :                 /*
     174             :                  * On most platforms we cannot get here because SHMMIN is
     175             :                  * greater than zero.  However, if we do succeed in creating a
     176             :                  * zero-size segment, free it and then fall through to report
     177             :                  * the original error.
     178             :                  */
     179           0 :                 if (shmctl(shmid, IPC_RMID, NULL) < 0)
     180           0 :                     elog(LOG, "shmctl(%d, %d, 0) failed: %m",
     181             :                          (int) shmid, IPC_RMID);
     182             :             }
     183             :         }
     184             : 
     185             :         /*
     186             :          * Else complain and abort.
     187             :          *
     188             :          * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX
     189             :          * is violated.  SHMALL violation might be reported as either ENOMEM
     190             :          * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which
     191             :          * it should be.  SHMMNI violation is ENOSPC, per spec.  Just plain
     192             :          * not-enough-RAM is ENOMEM.
     193             :          */
     194           0 :         errno = shmget_errno;
     195           0 :         ereport(FATAL,
     196             :                 (errmsg("could not create shared memory segment: %m"),
     197             :                  errdetail("Failed system call was shmget(key=%lu, size=%zu, 0%o).",
     198             :                            (unsigned long) memKey, size,
     199             :                            IPC_CREAT | IPC_EXCL | IPCProtection),
     200             :                  (shmget_errno == EINVAL) ?
     201             :                  errhint("This error usually means that PostgreSQL's request for a shared memory "
     202             :                          "segment exceeded your kernel's SHMMAX parameter, or possibly that "
     203             :                          "it is less than "
     204             :                          "your kernel's SHMMIN parameter.\n"
     205             :                          "The PostgreSQL documentation contains more information about shared "
     206             :                          "memory configuration.") : 0,
     207             :                  (shmget_errno == ENOMEM) ?
     208             :                  errhint("This error usually means that PostgreSQL's request for a shared "
     209             :                          "memory segment exceeded your kernel's SHMALL parameter.  You might need "
     210             :                          "to reconfigure the kernel with larger SHMALL.\n"
     211             :                          "The PostgreSQL documentation contains more information about shared "
     212             :                          "memory configuration.") : 0,
     213             :                  (shmget_errno == ENOSPC) ?
     214             :                  errhint("This error does *not* mean that you have run out of disk space.  "
     215             :                          "It occurs either if all available shared memory IDs have been taken, "
     216             :                          "in which case you need to raise the SHMMNI parameter in your kernel, "
     217             :                          "or because the system's overall limit for shared memory has been "
     218             :                          "reached.\n"
     219             :                          "The PostgreSQL documentation contains more information about shared "
     220             :                          "memory configuration.") : 0));
     221             :     }
     222             : 
     223             :     /* Register on-exit routine to delete the new segment */
     224           5 :     on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
     225             : 
     226             :     /* OK, should be able to attach to the segment */
     227           5 :     memAddress = shmat(shmid, requestedAddress, PG_SHMAT_FLAGS);
     228             : 
     229           5 :     if (memAddress == (void *) -1)
     230           0 :         elog(FATAL, "shmat(id=%d, addr=%p, flags=0x%x) failed: %m",
     231             :              shmid, requestedAddress, PG_SHMAT_FLAGS);
     232             : 
     233             :     /* Register on-exit routine to detach new segment before deleting */
     234           5 :     on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));
     235             : 
     236             :     /*
     237             :      * Store shmem key and ID in data directory lockfile.  Format to try to
     238             :      * keep it the same length always (trailing junk in the lockfile won't
     239             :      * hurt, but might confuse humans).
     240             :      */
     241             :     {
     242             :         char        line[64];
     243             : 
     244           5 :         sprintf(line, "%9lu %9lu",
     245             :                 (unsigned long) memKey, (unsigned long) shmid);
     246           5 :         AddToDataDirLockFile(LOCK_FILE_LINE_SHMEM_KEY, line);
     247             :     }
     248             : 
     249           5 :     return memAddress;
     250             : }
     251             : 
     252             : /****************************************************************************/
     253             : /*  IpcMemoryDetach(status, shmaddr)    removes a shared memory segment     */
     254             : /*                                      from process' address space         */
     255             : /*  (called as an on_shmem_exit callback, hence funny argument list)        */
     256             : /****************************************************************************/
     257             : static void
     258           5 : IpcMemoryDetach(int status, Datum shmaddr)
     259             : {
     260             :     /* Detach System V shared memory block. */
     261           5 :     if (shmdt(DatumGetPointer(shmaddr)) < 0)
     262           0 :         elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr));
     263           5 : }
     264             : 
     265             : /****************************************************************************/
     266             : /*  IpcMemoryDelete(status, shmId)      deletes a shared memory segment     */
     267             : /*  (called as an on_shmem_exit callback, hence funny argument list)        */
     268             : /****************************************************************************/
     269             : static void
     270           5 : IpcMemoryDelete(int status, Datum shmId)
     271             : {
     272           5 :     if (shmctl(DatumGetInt32(shmId), IPC_RMID, NULL) < 0)
     273           0 :         elog(LOG, "shmctl(%d, %d, 0) failed: %m",
     274             :              DatumGetInt32(shmId), IPC_RMID);
     275           5 : }
     276             : 
     277             : /*
     278             :  * PGSharedMemoryIsInUse
     279             :  *
     280             :  * Is a previously-existing shmem segment still existing and in use?
     281             :  *
     282             :  * The point of this exercise is to detect the case where a prior postmaster
     283             :  * crashed, but it left child backends that are still running.  Therefore
     284             :  * we only care about shmem segments that are associated with the intended
     285             :  * DataDir.  This is an important consideration since accidental matches of
     286             :  * shmem segment IDs are reasonably common.
     287             :  */
     288             : bool
     289           0 : PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
     290             : {
     291           0 :     IpcMemoryId shmId = (IpcMemoryId) id2;
     292             :     struct shmid_ds shmStat;
     293             :     struct stat statbuf;
     294             :     PGShmemHeader *hdr;
     295             : 
     296             :     /*
     297             :      * We detect whether a shared memory segment is in use by seeing whether
     298             :      * it (a) exists and (b) has any processes attached to it.
     299             :      */
     300           0 :     if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
     301             :     {
     302             :         /*
     303             :          * EINVAL actually has multiple possible causes documented in the
     304             :          * shmctl man page, but we assume it must mean the segment no longer
     305             :          * exists.
     306             :          */
     307           0 :         if (errno == EINVAL)
     308           0 :             return false;
     309             : 
     310             :         /*
     311             :          * EACCES implies that the segment belongs to some other userid, which
     312             :          * means it is not a Postgres shmem segment (or at least, not one that
     313             :          * is relevant to our data directory).
     314             :          */
     315           0 :         if (errno == EACCES)
     316           0 :             return false;
     317             : 
     318             :         /*
     319             :          * Some Linux kernel versions (in fact, all of them as of July 2007)
     320             :          * sometimes return EIDRM when EINVAL is correct.  The Linux kernel
     321             :          * actually does not have any internal state that would justify
     322             :          * returning EIDRM, so we can get away with assuming that EIDRM is
     323             :          * equivalent to EINVAL on that platform.
     324             :          */
     325             : #ifdef HAVE_LINUX_EIDRM_BUG
     326           0 :         if (errno == EIDRM)
     327           0 :             return false;
     328             : #endif
     329             : 
     330             :         /*
     331             :          * Otherwise, we had better assume that the segment is in use. The
     332             :          * only likely case is EIDRM, which implies that the segment has been
     333             :          * IPC_RMID'd but there are still processes attached to it.
     334             :          */
     335           0 :         return true;
     336             :     }
     337             : 
     338             :     /* If it has no attached processes, it's not in use */
     339           0 :     if (shmStat.shm_nattch == 0)
     340           0 :         return false;
     341             : 
     342             :     /*
     343             :      * Try to attach to the segment and see if it matches our data directory.
     344             :      * This avoids shmid-conflict problems on machines that are running
     345             :      * several postmasters under the same userid.
     346             :      */
     347           0 :     if (stat(DataDir, &statbuf) < 0)
     348           0 :         return true;            /* if can't stat, be conservative */
     349             : 
     350           0 :     hdr = (PGShmemHeader *) shmat(shmId, NULL, PG_SHMAT_FLAGS);
     351             : 
     352           0 :     if (hdr == (PGShmemHeader *) -1)
     353           0 :         return true;            /* if can't attach, be conservative */
     354             : 
     355           0 :     if (hdr->magic != PGShmemMagic ||
     356           0 :         hdr->device != statbuf.st_dev ||
     357           0 :         hdr->inode != statbuf.st_ino)
     358             :     {
     359             :         /*
     360             :          * It's either not a Postgres segment, or not one for my data
     361             :          * directory.  In either case it poses no threat.
     362             :          */
     363           0 :         shmdt((void *) hdr);
     364           0 :         return false;
     365             :     }
     366             : 
     367             :     /* Trouble --- looks a lot like there's still live backends */
     368           0 :     shmdt((void *) hdr);
     369             : 
     370           0 :     return true;
     371             : }
     372             : 
     373             : #ifdef USE_ANONYMOUS_SHMEM
     374             : 
     375             : #ifdef MAP_HUGETLB
     376             : 
     377             : /*
     378             :  * Identify the huge page size to use.
     379             :  *
     380             :  * Some Linux kernel versions have a bug causing mmap() to fail on requests
     381             :  * that are not a multiple of the hugepage size.  Versions without that bug
     382             :  * instead silently round the request up to the next hugepage multiple ---
     383             :  * and then munmap() fails when we give it a size different from that.
     384             :  * So we have to round our request up to a multiple of the actual hugepage
     385             :  * size to avoid trouble.
     386             :  *
     387             :  * Doing the round-up ourselves also lets us make use of the extra memory,
     388             :  * rather than just wasting it.  Currently, we just increase the available
     389             :  * space recorded in the shmem header, which will make the extra usable for
     390             :  * purposes such as additional locktable entries.  Someday, for very large
     391             :  * hugepage sizes, we might want to think about more invasive strategies,
     392             :  * such as increasing shared_buffers to absorb the extra space.
     393             :  *
     394             :  * Returns the (real or assumed) page size into *hugepagesize,
     395             :  * and the hugepage-related mmap flags to use into *mmap_flags.
     396             :  *
     397             :  * Currently *mmap_flags is always just MAP_HUGETLB.  Someday, on systems
     398             :  * that support it, we might OR in additional bits to specify a particular
     399             :  * non-default huge page size.
     400             :  */
     401             : static void
     402           5 : GetHugePageSize(Size *hugepagesize, int *mmap_flags)
     403             : {
     404             :     /*
     405             :      * If we fail to find out the system's default huge page size, assume it
     406             :      * is 2MB.  This will work fine when the actual size is less.  If it's
     407             :      * more, we might get mmap() or munmap() failures due to unaligned
     408             :      * requests; but at this writing, there are no reports of any non-Linux
     409             :      * systems being picky about that.
     410             :      */
     411           5 :     *hugepagesize = 2 * 1024 * 1024;
     412           5 :     *mmap_flags = MAP_HUGETLB;
     413             : 
     414             :     /*
     415             :      * System-dependent code to find out the default huge page size.
     416             :      *
     417             :      * On Linux, read /proc/meminfo looking for a line like "Hugepagesize:
     418             :      * nnnn kB".  Ignore any failures, falling back to the preset default.
     419             :      */
     420             : #ifdef __linux__
     421             :     {
     422           5 :         FILE       *fp = AllocateFile("/proc/meminfo", "r");
     423             :         char        buf[128];
     424             :         unsigned int sz;
     425             :         char        ch;
     426             : 
     427           5 :         if (fp)
     428             :         {
     429         230 :             while (fgets(buf, sizeof(buf), fp))
     430             :             {
     431         225 :                 if (sscanf(buf, "Hugepagesize: %u %c", &sz, &ch) == 2)
     432             :                 {
     433           5 :                     if (ch == 'k')
     434             :                     {
     435           5 :                         *hugepagesize = sz * (Size) 1024;
     436           5 :                         break;
     437             :                     }
     438             :                     /* We could accept other units besides kB, if needed */
     439             :                 }
     440             :             }
     441           5 :             FreeFile(fp);
     442             :         }
     443             :     }
     444             : #endif                          /* __linux__ */
     445           5 : }
     446             : 
     447             : #endif                          /* MAP_HUGETLB */
     448             : 
     449             : /*
     450             :  * Creates an anonymous mmap()ed shared memory segment.
     451             :  *
     452             :  * Pass the requested size in *size.  This function will modify *size to the
     453             :  * actual size of the allocation, if it ends up allocating a segment that is
     454             :  * larger than requested.
     455             :  */
     456             : static void *
     457           5 : CreateAnonymousSegment(Size *size)
     458             : {
     459           5 :     Size        allocsize = *size;
     460           5 :     void       *ptr = MAP_FAILED;
     461           5 :     int         mmap_errno = 0;
     462             : 
     463             : #ifndef MAP_HUGETLB
     464             :     /* PGSharedMemoryCreate should have dealt with this case */
     465             :     Assert(huge_pages != HUGE_PAGES_ON);
     466             : #else
     467           5 :     if (huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY)
     468             :     {
     469             :         /*
     470             :          * Round up the request size to a suitable large value.
     471             :          */
     472             :         Size        hugepagesize;
     473             :         int         mmap_flags;
     474             : 
     475           5 :         GetHugePageSize(&hugepagesize, &mmap_flags);
     476             : 
     477           5 :         if (allocsize % hugepagesize != 0)
     478           5 :             allocsize += hugepagesize - (allocsize % hugepagesize);
     479             : 
     480           5 :         ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
     481             :                    PG_MMAP_FLAGS | mmap_flags, -1, 0);
     482           5 :         mmap_errno = errno;
     483           5 :         if (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED)
     484           5 :             elog(DEBUG1, "mmap(%zu) with MAP_HUGETLB failed, huge pages disabled: %m",
     485             :                  allocsize);
     486             :     }
     487             : #endif
     488             : 
     489           5 :     if (ptr == MAP_FAILED && huge_pages != HUGE_PAGES_ON)
     490             :     {
     491             :         /*
     492             :          * Use the original size, not the rounded-up value, when falling back
     493             :          * to non-huge pages.
     494             :          */
     495           5 :         allocsize = *size;
     496           5 :         ptr = mmap(NULL, allocsize, PROT_READ | PROT_WRITE,
     497             :                    PG_MMAP_FLAGS, -1, 0);
     498           5 :         mmap_errno = errno;
     499             :     }
     500             : 
     501           5 :     if (ptr == MAP_FAILED)
     502             :     {
     503           0 :         errno = mmap_errno;
     504           0 :         ereport(FATAL,
     505             :                 (errmsg("could not map anonymous shared memory: %m"),
     506             :                  (mmap_errno == ENOMEM) ?
     507             :                  errhint("This error usually means that PostgreSQL's request "
     508             :                          "for a shared memory segment exceeded available memory, "
     509             :                          "swap space, or huge pages. To reduce the request size "
     510             :                          "(currently %zu bytes), reduce PostgreSQL's shared "
     511             :                          "memory usage, perhaps by reducing shared_buffers or "
     512             :                          "max_connections.",
     513             :                          *size) : 0));
     514             :     }
     515             : 
     516           5 :     *size = allocsize;
     517           5 :     return ptr;
     518             : }
     519             : 
     520             : /*
     521             :  * AnonymousShmemDetach --- detach from an anonymous mmap'd block
     522             :  * (called as an on_shmem_exit callback, hence funny argument list)
     523             :  */
     524             : static void
     525           5 : AnonymousShmemDetach(int status, Datum arg)
     526             : {
     527             :     /* Release anonymous shared memory block, if any. */
     528           5 :     if (AnonymousShmem != NULL)
     529             :     {
     530           5 :         if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
     531           0 :             elog(LOG, "munmap(%p, %zu) failed: %m",
     532             :                  AnonymousShmem, AnonymousShmemSize);
     533           5 :         AnonymousShmem = NULL;
     534             :     }
     535           5 : }
     536             : 
     537             : #endif                          /* USE_ANONYMOUS_SHMEM */
     538             : 
     539             : /*
     540             :  * PGSharedMemoryCreate
     541             :  *
     542             :  * Create a shared memory segment of the given size and initialize its
     543             :  * standard header.  Also, register an on_shmem_exit callback to release
     544             :  * the storage.
     545             :  *
     546             :  * Dead Postgres segments are recycled if found, but we do not fail upon
     547             :  * collision with non-Postgres shmem segments.  The idea here is to detect and
     548             :  * re-use keys that may have been assigned by a crashed postmaster or backend.
     549             :  *
     550             :  * makePrivate means to always create a new segment, rather than attach to
     551             :  * or recycle any existing segment.
     552             :  *
     553             :  * The port number is passed for possible use as a key (for SysV, we use
     554             :  * it to generate the starting shmem key).  In a standalone backend,
     555             :  * zero will be passed.
     556             :  */
     557             : PGShmemHeader *
     558           5 : PGSharedMemoryCreate(Size size, bool makePrivate, int port,
     559             :                      PGShmemHeader **shim)
     560             : {
     561             :     IpcMemoryKey NextShmemSegID;
     562             :     void       *memAddress;
     563             :     PGShmemHeader *hdr;
     564             :     IpcMemoryId shmid;
     565             :     struct stat statbuf;
     566             :     Size        sysvsize;
     567             : 
     568             :     /* Complain if hugepages demanded but we can't possibly support them */
     569             : #if !defined(USE_ANONYMOUS_SHMEM) || !defined(MAP_HUGETLB)
     570             :     if (huge_pages == HUGE_PAGES_ON)
     571             :         ereport(ERROR,
     572             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     573             :                  errmsg("huge pages not supported on this platform")));
     574             : #endif
     575             : 
     576             :     /* Room for a header? */
     577           5 :     Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
     578             : 
     579             : #ifdef USE_ANONYMOUS_SHMEM
     580           5 :     AnonymousShmem = CreateAnonymousSegment(&size);
     581           5 :     AnonymousShmemSize = size;
     582             : 
     583             :     /* Register on-exit routine to unmap the anonymous segment */
     584           5 :     on_shmem_exit(AnonymousShmemDetach, (Datum) 0);
     585             : 
     586             :     /* Now we need only allocate a minimal-sized SysV shmem block. */
     587           5 :     sysvsize = sizeof(PGShmemHeader);
     588             : #else
     589             :     sysvsize = size;
     590             : #endif
     591             : 
     592             :     /* Make sure PGSharedMemoryAttach doesn't fail without need */
     593           5 :     UsedShmemSegAddr = NULL;
     594             : 
     595             :     /* Loop till we find a free IPC key */
     596           5 :     NextShmemSegID = port * 1000;
     597             : 
     598           5 :     for (NextShmemSegID++;; NextShmemSegID++)
     599             :     {
     600             :         /* Try to create new segment */
     601           5 :         memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
     602           5 :         if (memAddress)
     603           5 :             break;              /* successful create and attach */
     604             : 
     605             :         /* Check shared memory and possibly remove and recreate */
     606             : 
     607           0 :         if (makePrivate)        /* a standalone backend shouldn't do this */
     608           0 :             continue;
     609             : 
     610           0 :         if ((memAddress = PGSharedMemoryAttach(NextShmemSegID, &shmid)) == NULL)
     611           0 :             continue;           /* can't attach, not one of mine */
     612             : 
     613             :         /*
     614             :          * If I am not the creator and it belongs to an extant process,
     615             :          * continue.
     616             :          */
     617           0 :         hdr = (PGShmemHeader *) memAddress;
     618           0 :         if (hdr->creatorPID != getpid())
     619             :         {
     620           0 :             if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH)
     621             :             {
     622           0 :                 shmdt(memAddress);
     623           0 :                 continue;       /* segment belongs to a live process */
     624             :             }
     625             :         }
     626             : 
     627             :         /*
     628             :          * The segment appears to be from a dead Postgres process, or from a
     629             :          * previous cycle of life in this same process.  Zap it, if possible,
     630             :          * and any associated dynamic shared memory segments, as well. This
     631             :          * probably shouldn't fail, but if it does, assume the segment belongs
     632             :          * to someone else after all, and continue quietly.
     633             :          */
     634           0 :         if (hdr->dsm_control != 0)
     635           0 :             dsm_cleanup_using_control_segment(hdr->dsm_control);
     636           0 :         shmdt(memAddress);
     637           0 :         if (shmctl(shmid, IPC_RMID, NULL) < 0)
     638           0 :             continue;
     639             : 
     640             :         /*
     641             :          * Now try again to create the segment.
     642             :          */
     643           0 :         memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
     644           0 :         if (memAddress)
     645           0 :             break;              /* successful create and attach */
     646             : 
     647             :         /*
     648             :          * Can only get here if some other process managed to create the same
     649             :          * shmem key before we did.  Let him have that one, loop around to try
     650             :          * next key.
     651             :          */
     652           0 :     }
     653             : 
     654             :     /*
     655             :      * OK, we created a new segment.  Mark it as created by this process. The
     656             :      * order of assignments here is critical so that another Postgres process
     657             :      * can't see the header as valid but belonging to an invalid PID!
     658             :      */
     659           5 :     hdr = (PGShmemHeader *) memAddress;
     660           5 :     hdr->creatorPID = getpid();
     661           5 :     hdr->magic = PGShmemMagic;
     662           5 :     hdr->dsm_control = 0;
     663             : 
     664             :     /* Fill in the data directory ID info, too */
     665           5 :     if (stat(DataDir, &statbuf) < 0)
     666           0 :         ereport(FATAL,
     667             :                 (errcode_for_file_access(),
     668             :                  errmsg("could not stat data directory \"%s\": %m",
     669             :                         DataDir)));
     670           5 :     hdr->device = statbuf.st_dev;
     671           5 :     hdr->inode = statbuf.st_ino;
     672             : 
     673             :     /*
     674             :      * Initialize space allocation status for segment.
     675             :      */
     676           5 :     hdr->totalsize = size;
     677           5 :     hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
     678           5 :     *shim = hdr;
     679             : 
     680             :     /* Save info for possible future use */
     681           5 :     UsedShmemSegAddr = memAddress;
     682           5 :     UsedShmemSegID = (unsigned long) NextShmemSegID;
     683             : 
     684             :     /*
     685             :      * If AnonymousShmem is NULL here, then we're not using anonymous shared
     686             :      * memory, and should return a pointer to the System V shared memory
     687             :      * block. Otherwise, the System V shared memory block is only a shim, and
     688             :      * we must return a pointer to the real block.
     689             :      */
     690             : #ifdef USE_ANONYMOUS_SHMEM
     691           5 :     if (AnonymousShmem == NULL)
     692           0 :         return hdr;
     693           5 :     memcpy(AnonymousShmem, hdr, sizeof(PGShmemHeader));
     694           5 :     return (PGShmemHeader *) AnonymousShmem;
     695             : #else
     696             :     return hdr;
     697             : #endif
     698             : }
     699             : 
     700             : #ifdef EXEC_BACKEND
     701             : 
     702             : /*
     703             :  * PGSharedMemoryReAttach
     704             :  *
     705             :  * This is called during startup of a postmaster child process to re-attach to
     706             :  * an already existing shared memory segment.  This is needed only in the
     707             :  * EXEC_BACKEND case; otherwise postmaster children inherit the shared memory
     708             :  * segment attachment via fork().
     709             :  *
     710             :  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
     711             :  * routine.  The caller must have already restored them to the postmaster's
     712             :  * values.
     713             :  */
     714             : void
     715             : PGSharedMemoryReAttach(void)
     716             : {
     717             :     IpcMemoryId shmid;
     718             :     void       *hdr;
     719             :     void       *origUsedShmemSegAddr = UsedShmemSegAddr;
     720             : 
     721             :     Assert(UsedShmemSegAddr != NULL);
     722             :     Assert(IsUnderPostmaster);
     723             : 
     724             : #ifdef __CYGWIN__
     725             :     /* cygipc (currently) appears to not detach on exec. */
     726             :     PGSharedMemoryDetach();
     727             :     UsedShmemSegAddr = origUsedShmemSegAddr;
     728             : #endif
     729             : 
     730             :     elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
     731             :     hdr = (void *) PGSharedMemoryAttach((IpcMemoryKey) UsedShmemSegID, &shmid);
     732             :     if (hdr == NULL)
     733             :         elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
     734             :              (int) UsedShmemSegID, UsedShmemSegAddr);
     735             :     if (hdr != origUsedShmemSegAddr)
     736             :         elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
     737             :              hdr, origUsedShmemSegAddr);
     738             :     dsm_set_control_handle(((PGShmemHeader *) hdr)->dsm_control);
     739             : 
     740             :     UsedShmemSegAddr = hdr;     /* probably redundant */
     741             : }
     742             : 
     743             : /*
     744             :  * PGSharedMemoryNoReAttach
     745             :  *
     746             :  * This is called during startup of a postmaster child process when we choose
     747             :  * *not* to re-attach to the existing shared memory segment.  We must clean up
     748             :  * to leave things in the appropriate state.  This is not used in the non
     749             :  * EXEC_BACKEND case, either.
     750             :  *
     751             :  * The child process startup logic might or might not call PGSharedMemoryDetach
     752             :  * after this; make sure that it will be a no-op if called.
     753             :  *
     754             :  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
     755             :  * routine.  The caller must have already restored them to the postmaster's
     756             :  * values.
     757             :  */
     758             : void
     759             : PGSharedMemoryNoReAttach(void)
     760             : {
     761             :     Assert(UsedShmemSegAddr != NULL);
     762             :     Assert(IsUnderPostmaster);
     763             : 
     764             : #ifdef __CYGWIN__
     765             :     /* cygipc (currently) appears to not detach on exec. */
     766             :     PGSharedMemoryDetach();
     767             : #endif
     768             : 
     769             :     /* For cleanliness, reset UsedShmemSegAddr to show we're not attached. */
     770             :     UsedShmemSegAddr = NULL;
     771             :     /* And the same for UsedShmemSegID. */
     772             :     UsedShmemSegID = 0;
     773             : }
     774             : 
     775             : #endif                          /* EXEC_BACKEND */
     776             : 
     777             : /*
     778             :  * PGSharedMemoryDetach
     779             :  *
     780             :  * Detach from the shared memory segment, if still attached.  This is not
     781             :  * intended to be called explicitly by the process that originally created the
     782             :  * segment (it will have on_shmem_exit callback(s) registered to do that).
     783             :  * Rather, this is for subprocesses that have inherited an attachment and want
     784             :  * to get rid of it.
     785             :  *
     786             :  * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
     787             :  * routine, also AnonymousShmem and AnonymousShmemSize.
     788             :  */
     789             : void
     790           1 : PGSharedMemoryDetach(void)
     791             : {
     792           1 :     if (UsedShmemSegAddr != NULL)
     793             :     {
     794           1 :         if ((shmdt(UsedShmemSegAddr) < 0)
     795             : #if defined(EXEC_BACKEND) && defined(__CYGWIN__)
     796             :         /* Work-around for cygipc exec bug */
     797             :             && shmdt(NULL) < 0
     798             : #endif
     799             :             )
     800           0 :             elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr);
     801           1 :         UsedShmemSegAddr = NULL;
     802             :     }
     803             : 
     804             : #ifdef USE_ANONYMOUS_SHMEM
     805           1 :     if (AnonymousShmem != NULL)
     806             :     {
     807           1 :         if (munmap(AnonymousShmem, AnonymousShmemSize) < 0)
     808           0 :             elog(LOG, "munmap(%p, %zu) failed: %m",
     809             :                  AnonymousShmem, AnonymousShmemSize);
     810           1 :         AnonymousShmem = NULL;
     811             :     }
     812             : #endif
     813           1 : }
     814             : 
     815             : 
     816             : /*
     817             :  * Attach to shared memory and make sure it has a Postgres header
     818             :  *
     819             :  * Returns attach address if OK, else NULL
     820             :  */
     821             : static PGShmemHeader *
     822           0 : PGSharedMemoryAttach(IpcMemoryKey key, IpcMemoryId *shmid)
     823             : {
     824             :     PGShmemHeader *hdr;
     825             : 
     826           0 :     if ((*shmid = shmget(key, sizeof(PGShmemHeader), 0)) < 0)
     827           0 :         return NULL;
     828             : 
     829           0 :     hdr = (PGShmemHeader *) shmat(*shmid, UsedShmemSegAddr, PG_SHMAT_FLAGS);
     830             : 
     831           0 :     if (hdr == (PGShmemHeader *) -1)
     832           0 :         return NULL;            /* failed: must be some other app's */
     833             : 
     834           0 :     if (hdr->magic != PGShmemMagic)
     835             :     {
     836           0 :         shmdt((void *) hdr);
     837           0 :         return NULL;            /* segment belongs to a non-Postgres app */
     838             :     }
     839             : 
     840           0 :     return hdr;
     841             : }

Generated by: LCOV version 1.11