LCOV - code coverage report
Current view: top level - src/backend/storage/ipc - dsm_impl.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 35 230 15.2 %
Date: 2017-09-29 15:12:54 Functions: 4 8 50.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dsm_impl.c
       4             :  *    manage dynamic shared memory segments
       5             :  *
       6             :  * This file provides low-level APIs for creating and destroying shared
       7             :  * memory segments using several different possible techniques.  We refer
       8             :  * to these segments as dynamic because they can be created, altered, and
       9             :  * destroyed at any point during the server life cycle.  This is unlike
      10             :  * the main shared memory segment, of which there is always exactly one
      11             :  * and which is always mapped at a fixed address in every PostgreSQL
      12             :  * background process.
      13             :  *
      14             :  * Because not all systems provide the same primitives in this area, nor
      15             :  * do all primitives behave the same way on all systems, we provide
      16             :  * several implementations of this facility.  Many systems implement
      17             :  * POSIX shared memory (shm_open etc.), which is well-suited to our needs
      18             :  * in this area, with the exception that shared memory identifiers live
      19             :  * in a flat system-wide namespace, raising the uncomfortable prospect of
      20             :  * name collisions with other processes (including other copies of
      21             :  * PostgreSQL) running on the same system.  Some systems only support
      22             :  * the older System V shared memory interface (shmget etc.) which is
      23             :  * also usable; however, the default allocation limits are often quite
      24             :  * small, and the namespace is even more restricted.
      25             :  *
      26             :  * We also provide an mmap-based shared memory implementation.  This may
      27             :  * be useful on systems that provide shared memory via a special-purpose
      28             :  * filesystem; by opting for this implementation, the user can even
      29             :  * control precisely where their shared memory segments are placed.  It
      30             :  * can also be used as a fallback for systems where shm_open and shmget
      31             :  * are not available or can't be used for some reason.  Of course,
      32             :  * mapping a file residing on an actual spinning disk is a fairly poor
      33             :  * approximation for shared memory because writeback may hurt performance
      34             :  * substantially, but there should be few systems where we must make do
      35             :  * with such poor tools.
      36             :  *
      37             :  * As ever, Windows requires its own implementation.
      38             :  *
      39             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      40             :  * Portions Copyright (c) 1994, Regents of the University of California
      41             :  *
      42             :  *
      43             :  * IDENTIFICATION
      44             :  *    src/backend/storage/ipc/dsm_impl.c
      45             :  *
      46             :  *-------------------------------------------------------------------------
      47             :  */
      48             : 
      49             : #include "postgres.h"
      50             : 
      51             : #include <fcntl.h>
      52             : #include <unistd.h>
      53             : #ifndef WIN32
      54             : #include <sys/mman.h>
      55             : #endif
      56             : #include <sys/stat.h>
      57             : #ifdef HAVE_SYS_IPC_H
      58             : #include <sys/ipc.h>
      59             : #endif
      60             : #ifdef HAVE_SYS_SHM_H
      61             : #include <sys/shm.h>
      62             : #endif
      63             : #include "pgstat.h"
      64             : 
      65             : #include "portability/mem.h"
      66             : #include "storage/dsm_impl.h"
      67             : #include "storage/fd.h"
      68             : #include "utils/guc.h"
      69             : #include "utils/memutils.h"
      70             : #include "postmaster/postmaster.h"
      71             : 
      72             : #ifdef USE_DSM_POSIX
      73             : static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
      74             :                void **impl_private, void **mapped_address,
      75             :                Size *mapped_size, int elevel);
      76             : #endif
      77             : #ifdef USE_DSM_SYSV
      78             : static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
      79             :               void **impl_private, void **mapped_address,
      80             :               Size *mapped_size, int elevel);
      81             : #endif
      82             : #ifdef USE_DSM_WINDOWS
      83             : static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
      84             :                  void **impl_private, void **mapped_address,
      85             :                  Size *mapped_size, int elevel);
      86             : #endif
      87             : #ifdef USE_DSM_MMAP
      88             : static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
      89             :               void **impl_private, void **mapped_address,
      90             :               Size *mapped_size, int elevel);
      91             : #endif
      92             : static int  errcode_for_dynamic_shared_memory(void);
      93             : 
      94             : const struct config_enum_entry dynamic_shared_memory_options[] = {
      95             : #ifdef USE_DSM_POSIX
      96             :     {"posix", DSM_IMPL_POSIX, false},
      97             : #endif
      98             : #ifdef USE_DSM_SYSV
      99             :     {"sysv", DSM_IMPL_SYSV, false},
     100             : #endif
     101             : #ifdef USE_DSM_WINDOWS
     102             :     {"windows", DSM_IMPL_WINDOWS, false},
     103             : #endif
     104             : #ifdef USE_DSM_MMAP
     105             :     {"mmap", DSM_IMPL_MMAP, false},
     106             : #endif
     107             :     {"none", DSM_IMPL_NONE, false},
     108             :     {NULL, 0, false}
     109             : };
     110             : 
     111             : /* Implementation selector. */
     112             : int         dynamic_shared_memory_type;
     113             : 
     114             : /* Size of buffer to be used for zero-filling. */
     115             : #define ZBUFFER_SIZE                8192
     116             : 
     117             : #define SEGMENT_NAME_PREFIX         "Global/PostgreSQL"
     118             : 
     119             : /*------
     120             :  * Perform a low-level shared memory operation in a platform-specific way,
     121             :  * as dictated by the selected implementation.  Each implementation is
     122             :  * required to implement the following primitives.
     123             :  *
     124             :  * DSM_OP_CREATE.  Create a segment whose size is the request_size and
     125             :  * map it.
     126             :  *
     127             :  * DSM_OP_ATTACH.  Map the segment, whose size must be the request_size.
     128             :  * The segment may already be mapped; any existing mapping should be removed
     129             :  * before creating a new one.
     130             :  *
     131             :  * DSM_OP_DETACH.  Unmap the segment.
     132             :  *
     133             :  * DSM_OP_RESIZE.  Resize the segment to the given request_size and
     134             :  * remap the segment at that new size.
     135             :  *
     136             :  * DSM_OP_DESTROY.  Unmap the segment, if it is mapped.  Destroy the
     137             :  * segment.
     138             :  *
     139             :  * Arguments:
     140             :  *   op: The operation to be performed.
     141             :  *   handle: The handle of an existing object, or for DSM_OP_CREATE, the
     142             :  *     a new handle the caller wants created.
     143             :  *   request_size: For DSM_OP_CREATE, the requested size.  For DSM_OP_RESIZE,
     144             :  *     the new size.  Otherwise, 0.
     145             :  *   impl_private: Private, implementation-specific data.  Will be a pointer
     146             :  *     to NULL for the first operation on a shared memory segment within this
     147             :  *     backend; thereafter, it will point to the value to which it was set
     148             :  *     on the previous call.
     149             :  *   mapped_address: Pointer to start of current mapping; pointer to NULL
     150             :  *     if none.  Updated with new mapping address.
     151             :  *   mapped_size: Pointer to size of current mapping; pointer to 0 if none.
     152             :  *     Updated with new mapped size.
     153             :  *   elevel: Level at which to log errors.
     154             :  *
     155             :  * Return value: true on success, false on failure.  When false is returned,
     156             :  * a message should first be logged at the specified elevel, except in the
     157             :  * case where DSM_OP_CREATE experiences a name collision, which should
     158             :  * silently return false.
     159             :  *-----
     160             :  */
     161             : bool
     162         382 : dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
     163             :             void **impl_private, void **mapped_address, Size *mapped_size,
     164             :             int elevel)
     165             : {
     166         382 :     Assert(op == DSM_OP_CREATE || op == DSM_OP_RESIZE || request_size == 0);
     167         382 :     Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
     168             :            (*mapped_address == NULL && *mapped_size == 0));
     169             : 
     170         382 :     switch (dynamic_shared_memory_type)
     171             :     {
     172             : #ifdef USE_DSM_POSIX
     173             :         case DSM_IMPL_POSIX:
     174         382 :             return dsm_impl_posix(op, handle, request_size, impl_private,
     175             :                                   mapped_address, mapped_size, elevel);
     176             : #endif
     177             : #ifdef USE_DSM_SYSV
     178             :         case DSM_IMPL_SYSV:
     179           0 :             return dsm_impl_sysv(op, handle, request_size, impl_private,
     180             :                                  mapped_address, mapped_size, elevel);
     181             : #endif
     182             : #ifdef USE_DSM_WINDOWS
     183             :         case DSM_IMPL_WINDOWS:
     184             :             return dsm_impl_windows(op, handle, request_size, impl_private,
     185             :                                     mapped_address, mapped_size, elevel);
     186             : #endif
     187             : #ifdef USE_DSM_MMAP
     188             :         case DSM_IMPL_MMAP:
     189           0 :             return dsm_impl_mmap(op, handle, request_size, impl_private,
     190             :                                  mapped_address, mapped_size, elevel);
     191             : #endif
     192             :         default:
     193           0 :             elog(ERROR, "unexpected dynamic shared memory type: %d",
     194             :                  dynamic_shared_memory_type);
     195             :             return false;
     196             :     }
     197             : }
     198             : 
     199             : /*
     200             :  * Does the current dynamic shared memory implementation support resizing
     201             :  * segments?  (The answer here could be platform-dependent in the future,
     202             :  * since AIX allows shmctl(shmid, SHM_RESIZE, &buffer), though you apparently
     203             :  * can't resize segments to anything larger than 256MB that way.  For now,
     204             :  * we keep it simple.)
     205             :  */
     206             : bool
     207           0 : dsm_impl_can_resize(void)
     208             : {
     209           0 :     switch (dynamic_shared_memory_type)
     210             :     {
     211             :         case DSM_IMPL_NONE:
     212           0 :             return false;
     213             :         case DSM_IMPL_POSIX:
     214           0 :             return true;
     215             :         case DSM_IMPL_SYSV:
     216           0 :             return false;
     217             :         case DSM_IMPL_WINDOWS:
     218           0 :             return false;
     219             :         case DSM_IMPL_MMAP:
     220           0 :             return true;
     221             :         default:
     222           0 :             return false;       /* should not happen */
     223             :     }
     224             : }
     225             : 
     226             : #ifdef USE_DSM_POSIX
     227             : /*
     228             :  * Operating system primitives to support POSIX shared memory.
     229             :  *
     230             :  * POSIX shared memory segments are created and attached using shm_open()
     231             :  * and shm_unlink(); other operations, such as sizing or mapping the
     232             :  * segment, are performed as if the shared memory segments were files.
     233             :  *
     234             :  * Indeed, on some platforms, they may be implemented that way.  While
     235             :  * POSIX shared memory segments seem intended to exist in a flat namespace,
     236             :  * some operating systems may implement them as files, even going so far
     237             :  * to treat a request for /xyz as a request to create a file by that name
     238             :  * in the root directory.  Users of such broken platforms should select
     239             :  * a different shared memory implementation.
     240             :  */
     241             : static bool
     242         382 : dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
     243             :                void **impl_private, void **mapped_address, Size *mapped_size,
     244             :                int elevel)
     245             : {
     246             :     char        name[64];
     247             :     int         flags;
     248             :     int         fd;
     249             :     char       *address;
     250             : 
     251         382 :     snprintf(name, 64, "/PostgreSQL.%u", handle);
     252             : 
     253             :     /* Handle teardown cases. */
     254         382 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     255             :     {
     256         201 :         if (*mapped_address != NULL
     257         182 :             && munmap(*mapped_address, *mapped_size) != 0)
     258             :         {
     259           0 :             ereport(elevel,
     260             :                     (errcode_for_dynamic_shared_memory(),
     261             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     262             :                             name)));
     263           0 :             return false;
     264             :         }
     265         201 :         *mapped_address = NULL;
     266         201 :         *mapped_size = 0;
     267         201 :         if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
     268             :         {
     269           0 :             ereport(elevel,
     270             :                     (errcode_for_dynamic_shared_memory(),
     271             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     272             :                             name)));
     273           0 :             return false;
     274             :         }
     275         201 :         return true;
     276             :     }
     277             : 
     278             :     /*
     279             :      * Create new segment or open an existing one for attach or resize.
     280             :      *
     281             :      * Even though we're not going through fd.c, we should be safe against
     282             :      * running out of file descriptors, because of NUM_RESERVED_FDS.  We're
     283             :      * only opening one extra descriptor here, and we'll close it before
     284             :      * returning.
     285             :      */
     286         181 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     287         181 :     if ((fd = shm_open(name, flags, 0600)) == -1)
     288             :     {
     289           0 :         if (errno != EEXIST)
     290           0 :             ereport(elevel,
     291             :                     (errcode_for_dynamic_shared_memory(),
     292             :                      errmsg("could not open shared memory segment \"%s\": %m",
     293             :                             name)));
     294           0 :         return false;
     295             :     }
     296             : 
     297             :     /*
     298             :      * If we're attaching the segment, determine the current size; if we are
     299             :      * creating or resizing the segment, set the size to the requested value.
     300             :      */
     301         181 :     if (op == DSM_OP_ATTACH)
     302             :     {
     303             :         struct stat st;
     304             : 
     305         159 :         if (fstat(fd, &st) != 0)
     306             :         {
     307             :             int         save_errno;
     308             : 
     309             :             /* Back out what's already been done. */
     310           0 :             save_errno = errno;
     311           0 :             close(fd);
     312           0 :             errno = save_errno;
     313             : 
     314           0 :             ereport(elevel,
     315             :                     (errcode_for_dynamic_shared_memory(),
     316             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     317             :                             name)));
     318           0 :             return false;
     319             :         }
     320         159 :         request_size = st.st_size;
     321             :     }
     322          22 :     else if (*mapped_size != request_size && ftruncate(fd, request_size))
     323             :     {
     324             :         int         save_errno;
     325             : 
     326             :         /* Back out what's already been done. */
     327           0 :         save_errno = errno;
     328           0 :         close(fd);
     329           0 :         if (op == DSM_OP_CREATE)
     330           0 :             shm_unlink(name);
     331           0 :         errno = save_errno;
     332             : 
     333           0 :         ereport(elevel,
     334             :                 (errcode_for_dynamic_shared_memory(),
     335             :                  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     336             :                         name, request_size)));
     337           0 :         return false;
     338             :     }
     339             : 
     340             :     /*
     341             :      * If we're reattaching or resizing, we must remove any existing mapping,
     342             :      * unless we've already got the right thing mapped.
     343             :      */
     344         181 :     if (*mapped_address != NULL)
     345             :     {
     346           0 :         if (*mapped_size == request_size)
     347           0 :             return true;
     348           0 :         if (munmap(*mapped_address, *mapped_size) != 0)
     349             :         {
     350             :             int         save_errno;
     351             : 
     352             :             /* Back out what's already been done. */
     353           0 :             save_errno = errno;
     354           0 :             close(fd);
     355           0 :             if (op == DSM_OP_CREATE)
     356           0 :                 shm_unlink(name);
     357           0 :             errno = save_errno;
     358             : 
     359           0 :             ereport(elevel,
     360             :                     (errcode_for_dynamic_shared_memory(),
     361             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     362             :                             name)));
     363           0 :             return false;
     364             :         }
     365           0 :         *mapped_address = NULL;
     366           0 :         *mapped_size = 0;
     367             :     }
     368             : 
     369             :     /* Map it. */
     370         181 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     371             :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     372         181 :     if (address == MAP_FAILED)
     373             :     {
     374             :         int         save_errno;
     375             : 
     376             :         /* Back out what's already been done. */
     377           0 :         save_errno = errno;
     378           0 :         close(fd);
     379           0 :         if (op == DSM_OP_CREATE)
     380           0 :             shm_unlink(name);
     381           0 :         errno = save_errno;
     382             : 
     383           0 :         ereport(elevel,
     384             :                 (errcode_for_dynamic_shared_memory(),
     385             :                  errmsg("could not map shared memory segment \"%s\": %m",
     386             :                         name)));
     387           0 :         return false;
     388             :     }
     389         181 :     *mapped_address = address;
     390         181 :     *mapped_size = request_size;
     391         181 :     close(fd);
     392             : 
     393         181 :     return true;
     394             : }
     395             : #endif
     396             : 
     397             : #ifdef USE_DSM_SYSV
     398             : /*
     399             :  * Operating system primitives to support System V shared memory.
     400             :  *
     401             :  * System V shared memory segments are manipulated using shmget(), shmat(),
     402             :  * shmdt(), and shmctl().  There's no portable way to resize such
     403             :  * segments.  As the default allocation limits for System V shared memory
     404             :  * are usually quite low, the POSIX facilities may be preferable; but
     405             :  * those are not supported everywhere.
     406             :  */
     407             : static bool
     408           0 : dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
     409             :               void **impl_private, void **mapped_address, Size *mapped_size,
     410             :               int elevel)
     411             : {
     412             :     key_t       key;
     413             :     int         ident;
     414             :     char       *address;
     415             :     char        name[64];
     416             :     int        *ident_cache;
     417             : 
     418             :     /* Resize is not supported for System V shared memory. */
     419           0 :     if (op == DSM_OP_RESIZE)
     420             :     {
     421           0 :         elog(elevel, "System V shared memory segments cannot be resized");
     422           0 :         return false;
     423             :     }
     424             : 
     425             :     /* Since resize isn't supported, reattach is a no-op. */
     426           0 :     if (op == DSM_OP_ATTACH && *mapped_address != NULL)
     427           0 :         return true;
     428             : 
     429             :     /*
     430             :      * POSIX shared memory and mmap-based shared memory identify segments with
     431             :      * names.  To avoid needless error message variation, we use the handle as
     432             :      * the name.
     433             :      */
     434           0 :     snprintf(name, 64, "%u", handle);
     435             : 
     436             :     /*
     437             :      * The System V shared memory namespace is very restricted; names are of
     438             :      * type key_t, which is expected to be some sort of integer data type, but
     439             :      * not necessarily the same one as dsm_handle.  Since we use dsm_handle to
     440             :      * identify shared memory segments across processes, this might seem like
     441             :      * a problem, but it's really not.  If dsm_handle is bigger than key_t,
     442             :      * the cast below might truncate away some bits from the handle the
     443             :      * user-provided, but it'll truncate exactly the same bits away in exactly
     444             :      * the same fashion every time we use that handle, which is all that
     445             :      * really matters.  Conversely, if dsm_handle is smaller than key_t, we
     446             :      * won't use the full range of available key space, but that's no big deal
     447             :      * either.
     448             :      *
     449             :      * We do make sure that the key isn't negative, because that might not be
     450             :      * portable.
     451             :      */
     452           0 :     key = (key_t) handle;
     453           0 :     if (key < 1)             /* avoid compiler warning if type is unsigned */
     454           0 :         key = -key;
     455             : 
     456             :     /*
     457             :      * There's one special key, IPC_PRIVATE, which can't be used.  If we end
     458             :      * up with that value by chance during a create operation, just pretend it
     459             :      * already exists, so that caller will retry.  If we run into it anywhere
     460             :      * else, the caller has passed a handle that doesn't correspond to
     461             :      * anything we ever created, which should not happen.
     462             :      */
     463           0 :     if (key == IPC_PRIVATE)
     464             :     {
     465           0 :         if (op != DSM_OP_CREATE)
     466           0 :             elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
     467           0 :         errno = EEXIST;
     468           0 :         return false;
     469             :     }
     470             : 
     471             :     /*
     472             :      * Before we can do anything with a shared memory segment, we have to map
     473             :      * the shared memory key to a shared memory identifier using shmget(). To
     474             :      * avoid repeated lookups, we store the key using impl_private.
     475             :      */
     476           0 :     if (*impl_private != NULL)
     477             :     {
     478           0 :         ident_cache = *impl_private;
     479           0 :         ident = *ident_cache;
     480             :     }
     481             :     else
     482             :     {
     483           0 :         int         flags = IPCProtection;
     484             :         size_t      segsize;
     485             : 
     486             :         /*
     487             :          * Allocate the memory BEFORE acquiring the resource, so that we don't
     488             :          * leak the resource if memory allocation fails.
     489             :          */
     490           0 :         ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
     491             : 
     492             :         /*
     493             :          * When using shmget to find an existing segment, we must pass the
     494             :          * size as 0.  Passing a non-zero size which is greater than the
     495             :          * actual size will result in EINVAL.
     496             :          */
     497           0 :         segsize = 0;
     498             : 
     499           0 :         if (op == DSM_OP_CREATE)
     500             :         {
     501           0 :             flags |= IPC_CREAT | IPC_EXCL;
     502           0 :             segsize = request_size;
     503             :         }
     504             : 
     505           0 :         if ((ident = shmget(key, segsize, flags)) == -1)
     506             :         {
     507           0 :             if (errno != EEXIST)
     508             :             {
     509           0 :                 int         save_errno = errno;
     510             : 
     511           0 :                 pfree(ident_cache);
     512           0 :                 errno = save_errno;
     513           0 :                 ereport(elevel,
     514             :                         (errcode_for_dynamic_shared_memory(),
     515             :                          errmsg("could not get shared memory segment: %m")));
     516             :             }
     517           0 :             return false;
     518             :         }
     519             : 
     520           0 :         *ident_cache = ident;
     521           0 :         *impl_private = ident_cache;
     522             :     }
     523             : 
     524             :     /* Handle teardown cases. */
     525           0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     526             :     {
     527           0 :         pfree(ident_cache);
     528           0 :         *impl_private = NULL;
     529           0 :         if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
     530             :         {
     531           0 :             ereport(elevel,
     532             :                     (errcode_for_dynamic_shared_memory(),
     533             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     534             :                             name)));
     535           0 :             return false;
     536             :         }
     537           0 :         *mapped_address = NULL;
     538           0 :         *mapped_size = 0;
     539           0 :         if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
     540             :         {
     541           0 :             ereport(elevel,
     542             :                     (errcode_for_dynamic_shared_memory(),
     543             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     544             :                             name)));
     545           0 :             return false;
     546             :         }
     547           0 :         return true;
     548             :     }
     549             : 
     550             :     /* If we're attaching it, we must use IPC_STAT to determine the size. */
     551           0 :     if (op == DSM_OP_ATTACH)
     552             :     {
     553             :         struct shmid_ds shm;
     554             : 
     555           0 :         if (shmctl(ident, IPC_STAT, &shm) != 0)
     556             :         {
     557           0 :             ereport(elevel,
     558             :                     (errcode_for_dynamic_shared_memory(),
     559             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     560             :                             name)));
     561           0 :             return false;
     562             :         }
     563           0 :         request_size = shm.shm_segsz;
     564             :     }
     565             : 
     566             :     /* Map it. */
     567           0 :     address = shmat(ident, NULL, PG_SHMAT_FLAGS);
     568           0 :     if (address == (void *) -1)
     569             :     {
     570             :         int         save_errno;
     571             : 
     572             :         /* Back out what's already been done. */
     573           0 :         save_errno = errno;
     574           0 :         if (op == DSM_OP_CREATE)
     575           0 :             shmctl(ident, IPC_RMID, NULL);
     576           0 :         errno = save_errno;
     577             : 
     578           0 :         ereport(elevel,
     579             :                 (errcode_for_dynamic_shared_memory(),
     580             :                  errmsg("could not map shared memory segment \"%s\": %m",
     581             :                         name)));
     582           0 :         return false;
     583             :     }
     584           0 :     *mapped_address = address;
     585           0 :     *mapped_size = request_size;
     586             : 
     587           0 :     return true;
     588             : }
     589             : #endif
     590             : 
     591             : #ifdef USE_DSM_WINDOWS
     592             : /*
     593             :  * Operating system primitives to support Windows shared memory.
     594             :  *
     595             :  * Windows shared memory implementation is done using file mapping
     596             :  * which can be backed by either physical file or system paging file.
     597             :  * Current implementation uses system paging file as other effects
     598             :  * like performance are not clear for physical file and it is used in similar
     599             :  * way for main shared memory in windows.
     600             :  *
     601             :  * A memory mapping object is a kernel object - they always get deleted when
     602             :  * the last reference to them goes away, either explicitly via a CloseHandle or
     603             :  * when the process containing the reference exits.
     604             :  */
     605             : static bool
     606             : dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
     607             :                  void **impl_private, void **mapped_address,
     608             :                  Size *mapped_size, int elevel)
     609             : {
     610             :     char       *address;
     611             :     HANDLE      hmap;
     612             :     char        name[64];
     613             :     MEMORY_BASIC_INFORMATION info;
     614             : 
     615             :     /* Resize is not supported for Windows shared memory. */
     616             :     if (op == DSM_OP_RESIZE)
     617             :     {
     618             :         elog(elevel, "Windows shared memory segments cannot be resized");
     619             :         return false;
     620             :     }
     621             : 
     622             :     /* Since resize isn't supported, reattach is a no-op. */
     623             :     if (op == DSM_OP_ATTACH && *mapped_address != NULL)
     624             :         return true;
     625             : 
     626             :     /*
     627             :      * Storing the shared memory segment in the Global\ namespace, can allow
     628             :      * any process running in any session to access that file mapping object
     629             :      * provided that the caller has the required access rights. But to avoid
     630             :      * issues faced in main shared memory, we are using the naming convention
     631             :      * similar to main shared memory. We can change here once issue mentioned
     632             :      * in GetSharedMemName is resolved.
     633             :      */
     634             :     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     635             : 
     636             :     /*
     637             :      * Handle teardown cases.  Since Windows automatically destroys the object
     638             :      * when no references reamin, we can treat it the same as detach.
     639             :      */
     640             :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     641             :     {
     642             :         if (*mapped_address != NULL
     643             :             && UnmapViewOfFile(*mapped_address) == 0)
     644             :         {
     645             :             _dosmaperr(GetLastError());
     646             :             ereport(elevel,
     647             :                     (errcode_for_dynamic_shared_memory(),
     648             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     649             :                             name)));
     650             :             return false;
     651             :         }
     652             :         if (*impl_private != NULL
     653             :             && CloseHandle(*impl_private) == 0)
     654             :         {
     655             :             _dosmaperr(GetLastError());
     656             :             ereport(elevel,
     657             :                     (errcode_for_dynamic_shared_memory(),
     658             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     659             :                             name)));
     660             :             return false;
     661             :         }
     662             : 
     663             :         *impl_private = NULL;
     664             :         *mapped_address = NULL;
     665             :         *mapped_size = 0;
     666             :         return true;
     667             :     }
     668             : 
     669             :     /* Create new segment or open an existing one for attach. */
     670             :     if (op == DSM_OP_CREATE)
     671             :     {
     672             :         DWORD       size_high;
     673             :         DWORD       size_low;
     674             :         DWORD       errcode;
     675             : 
     676             :         /* Shifts >= the width of the type are undefined. */
     677             : #ifdef _WIN64
     678             :         size_high = request_size >> 32;
     679             : #else
     680             :         size_high = 0;
     681             : #endif
     682             :         size_low = (DWORD) request_size;
     683             : 
     684             :         /* CreateFileMapping might not clear the error code on success */
     685             :         SetLastError(0);
     686             : 
     687             :         hmap = CreateFileMapping(INVALID_HANDLE_VALUE,  /* Use the pagefile */
     688             :                                  NULL,  /* Default security attrs */
     689             :                                  PAGE_READWRITE,    /* Memory is read/write */
     690             :                                  size_high, /* Upper 32 bits of size */
     691             :                                  size_low,  /* Lower 32 bits of size */
     692             :                                  name);
     693             : 
     694             :         errcode = GetLastError();
     695             :         if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
     696             :         {
     697             :             /*
     698             :              * On Windows, when the segment already exists, a handle for the
     699             :              * existing segment is returned.  We must close it before
     700             :              * returning.  However, if the existing segment is created by a
     701             :              * service, then it returns ERROR_ACCESS_DENIED. We don't do
     702             :              * _dosmaperr here, so errno won't be modified.
     703             :              */
     704             :             if (hmap)
     705             :                 CloseHandle(hmap);
     706             :             return false;
     707             :         }
     708             : 
     709             :         if (!hmap)
     710             :         {
     711             :             _dosmaperr(errcode);
     712             :             ereport(elevel,
     713             :                     (errcode_for_dynamic_shared_memory(),
     714             :                      errmsg("could not create shared memory segment \"%s\": %m",
     715             :                             name)));
     716             :             return false;
     717             :         }
     718             :     }
     719             :     else
     720             :     {
     721             :         hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
     722             :                                FALSE,   /* do not inherit the name */
     723             :                                name);   /* name of mapping object */
     724             :         if (!hmap)
     725             :         {
     726             :             _dosmaperr(GetLastError());
     727             :             ereport(elevel,
     728             :                     (errcode_for_dynamic_shared_memory(),
     729             :                      errmsg("could not open shared memory segment \"%s\": %m",
     730             :                             name)));
     731             :             return false;
     732             :         }
     733             :     }
     734             : 
     735             :     /* Map it. */
     736             :     address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
     737             :                             0, 0, 0);
     738             :     if (!address)
     739             :     {
     740             :         int         save_errno;
     741             : 
     742             :         _dosmaperr(GetLastError());
     743             :         /* Back out what's already been done. */
     744             :         save_errno = errno;
     745             :         CloseHandle(hmap);
     746             :         errno = save_errno;
     747             : 
     748             :         ereport(elevel,
     749             :                 (errcode_for_dynamic_shared_memory(),
     750             :                  errmsg("could not map shared memory segment \"%s\": %m",
     751             :                         name)));
     752             :         return false;
     753             :     }
     754             : 
     755             :     /*
     756             :      * VirtualQuery gives size in page_size units, which is 4K for Windows. We
     757             :      * need size only when we are attaching, but it's better to get the size
     758             :      * when creating new segment to keep size consistent both for
     759             :      * DSM_OP_CREATE and DSM_OP_ATTACH.
     760             :      */
     761             :     if (VirtualQuery(address, &info, sizeof(info)) == 0)
     762             :     {
     763             :         int         save_errno;
     764             : 
     765             :         _dosmaperr(GetLastError());
     766             :         /* Back out what's already been done. */
     767             :         save_errno = errno;
     768             :         UnmapViewOfFile(address);
     769             :         CloseHandle(hmap);
     770             :         errno = save_errno;
     771             : 
     772             :         ereport(elevel,
     773             :                 (errcode_for_dynamic_shared_memory(),
     774             :                  errmsg("could not stat shared memory segment \"%s\": %m",
     775             :                         name)));
     776             :         return false;
     777             :     }
     778             : 
     779             :     *mapped_address = address;
     780             :     *mapped_size = info.RegionSize;
     781             :     *impl_private = hmap;
     782             : 
     783             :     return true;
     784             : }
     785             : #endif
     786             : 
     787             : #ifdef USE_DSM_MMAP
     788             : /*
     789             :  * Operating system primitives to support mmap-based shared memory.
     790             :  *
     791             :  * Calling this "shared memory" is somewhat of a misnomer, because what
     792             :  * we're really doing is creating a bunch of files and mapping them into
     793             :  * our address space.  The operating system may feel obliged to
     794             :  * synchronize the contents to disk even if nothing is being paged out,
     795             :  * which will not serve us well.  The user can relocate the pg_dynshmem
     796             :  * directory to a ramdisk to avoid this problem, if available.
     797             :  */
     798             : static bool
     799           0 : dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
     800             :               void **impl_private, void **mapped_address, Size *mapped_size,
     801             :               int elevel)
     802             : {
     803             :     char        name[64];
     804             :     int         flags;
     805             :     int         fd;
     806             :     char       *address;
     807             : 
     808           0 :     snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
     809             :              handle);
     810             : 
     811             :     /* Handle teardown cases. */
     812           0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     813             :     {
     814           0 :         if (*mapped_address != NULL
     815           0 :             && munmap(*mapped_address, *mapped_size) != 0)
     816             :         {
     817           0 :             ereport(elevel,
     818             :                     (errcode_for_dynamic_shared_memory(),
     819             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     820             :                             name)));
     821           0 :             return false;
     822             :         }
     823           0 :         *mapped_address = NULL;
     824           0 :         *mapped_size = 0;
     825           0 :         if (op == DSM_OP_DESTROY && unlink(name) != 0)
     826             :         {
     827           0 :             ereport(elevel,
     828             :                     (errcode_for_dynamic_shared_memory(),
     829             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     830             :                             name)));
     831           0 :             return false;
     832             :         }
     833           0 :         return true;
     834             :     }
     835             : 
     836             :     /* Create new segment or open an existing one for attach or resize. */
     837           0 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     838           0 :     if ((fd = OpenTransientFile(name, flags, 0600)) == -1)
     839             :     {
     840           0 :         if (errno != EEXIST)
     841           0 :             ereport(elevel,
     842             :                     (errcode_for_dynamic_shared_memory(),
     843             :                      errmsg("could not open shared memory segment \"%s\": %m",
     844             :                             name)));
     845           0 :         return false;
     846             :     }
     847             : 
     848             :     /*
     849             :      * If we're attaching the segment, determine the current size; if we are
     850             :      * creating or resizing the segment, set the size to the requested value.
     851             :      */
     852           0 :     if (op == DSM_OP_ATTACH)
     853             :     {
     854             :         struct stat st;
     855             : 
     856           0 :         if (fstat(fd, &st) != 0)
     857             :         {
     858             :             int         save_errno;
     859             : 
     860             :             /* Back out what's already been done. */
     861           0 :             save_errno = errno;
     862           0 :             CloseTransientFile(fd);
     863           0 :             errno = save_errno;
     864             : 
     865           0 :             ereport(elevel,
     866             :                     (errcode_for_dynamic_shared_memory(),
     867             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     868             :                             name)));
     869           0 :             return false;
     870             :         }
     871           0 :         request_size = st.st_size;
     872             :     }
     873           0 :     else if (*mapped_size > request_size && ftruncate(fd, request_size))
     874             :     {
     875             :         int         save_errno;
     876             : 
     877             :         /* Back out what's already been done. */
     878           0 :         save_errno = errno;
     879           0 :         close(fd);
     880           0 :         if (op == DSM_OP_CREATE)
     881           0 :             unlink(name);
     882           0 :         errno = save_errno;
     883             : 
     884           0 :         ereport(elevel,
     885             :                 (errcode_for_dynamic_shared_memory(),
     886             :                  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     887             :                         name, request_size)));
     888           0 :         return false;
     889             :     }
     890           0 :     else if (*mapped_size < request_size)
     891             :     {
     892             :         /*
     893             :          * Allocate a buffer full of zeros.
     894             :          *
     895             :          * Note: palloc zbuffer, instead of just using a local char array, to
     896             :          * ensure it is reasonably well-aligned; this may save a few cycles
     897             :          * transferring data to the kernel.
     898             :          */
     899           0 :         char       *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
     900           0 :         uint32      remaining = request_size;
     901           0 :         bool        success = true;
     902             : 
     903             :         /*
     904             :          * Zero-fill the file. We have to do this the hard way to ensure that
     905             :          * all the file space has really been allocated, so that we don't
     906             :          * later seg fault when accessing the memory mapping.  This is pretty
     907             :          * pessimal.
     908             :          */
     909           0 :         while (success && remaining > 0)
     910             :         {
     911           0 :             Size        goal = remaining;
     912             : 
     913           0 :             if (goal > ZBUFFER_SIZE)
     914           0 :                 goal = ZBUFFER_SIZE;
     915           0 :             pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
     916           0 :             if (write(fd, zbuffer, goal) == goal)
     917           0 :                 remaining -= goal;
     918             :             else
     919           0 :                 success = false;
     920           0 :             pgstat_report_wait_end();
     921             :         }
     922             : 
     923           0 :         if (!success)
     924             :         {
     925             :             int         save_errno;
     926             : 
     927             :             /* Back out what's already been done. */
     928           0 :             save_errno = errno;
     929           0 :             CloseTransientFile(fd);
     930           0 :             if (op == DSM_OP_CREATE)
     931           0 :                 unlink(name);
     932           0 :             errno = save_errno ? save_errno : ENOSPC;
     933             : 
     934           0 :             ereport(elevel,
     935             :                     (errcode_for_dynamic_shared_memory(),
     936             :                      errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     937             :                             name, request_size)));
     938           0 :             return false;
     939             :         }
     940             :     }
     941             : 
     942             :     /*
     943             :      * If we're reattaching or resizing, we must remove any existing mapping,
     944             :      * unless we've already got the right thing mapped.
     945             :      */
     946           0 :     if (*mapped_address != NULL)
     947             :     {
     948           0 :         if (*mapped_size == request_size)
     949           0 :             return true;
     950           0 :         if (munmap(*mapped_address, *mapped_size) != 0)
     951             :         {
     952             :             int         save_errno;
     953             : 
     954             :             /* Back out what's already been done. */
     955           0 :             save_errno = errno;
     956           0 :             CloseTransientFile(fd);
     957           0 :             if (op == DSM_OP_CREATE)
     958           0 :                 unlink(name);
     959           0 :             errno = save_errno;
     960             : 
     961           0 :             ereport(elevel,
     962             :                     (errcode_for_dynamic_shared_memory(),
     963             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     964             :                             name)));
     965           0 :             return false;
     966             :         }
     967           0 :         *mapped_address = NULL;
     968           0 :         *mapped_size = 0;
     969             :     }
     970             : 
     971             :     /* Map it. */
     972           0 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     973             :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     974           0 :     if (address == MAP_FAILED)
     975             :     {
     976             :         int         save_errno;
     977             : 
     978             :         /* Back out what's already been done. */
     979           0 :         save_errno = errno;
     980           0 :         CloseTransientFile(fd);
     981           0 :         if (op == DSM_OP_CREATE)
     982           0 :             unlink(name);
     983           0 :         errno = save_errno;
     984             : 
     985           0 :         ereport(elevel,
     986             :                 (errcode_for_dynamic_shared_memory(),
     987             :                  errmsg("could not map shared memory segment \"%s\": %m",
     988             :                         name)));
     989           0 :         return false;
     990             :     }
     991           0 :     *mapped_address = address;
     992           0 :     *mapped_size = request_size;
     993           0 :     CloseTransientFile(fd);
     994             : 
     995           0 :     return true;
     996             : }
     997             : #endif
     998             : 
     999             : /*
    1000             :  * Implementation-specific actions that must be performed when a segment is to
    1001             :  * be preserved even when no backend has it attached.
    1002             :  *
    1003             :  * Except on Windows, we don't need to do anything at all.  But since Windows
    1004             :  * cleans up segments automatically when no references remain, we duplicate
    1005             :  * the segment handle into the postmaster process.  The postmaster needn't
    1006             :  * do anything to receive the handle; Windows transfers it automatically.
    1007             :  */
    1008             : void
    1009           2 : dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
    1010             :                      void **impl_private_pm_handle)
    1011             : {
    1012           2 :     switch (dynamic_shared_memory_type)
    1013             :     {
    1014             : #ifdef USE_DSM_WINDOWS
    1015             :         case DSM_IMPL_WINDOWS:
    1016             :             {
    1017             :                 HANDLE      hmap;
    1018             : 
    1019             :                 if (!DuplicateHandle(GetCurrentProcess(), impl_private,
    1020             :                                      PostmasterHandle, &hmap, 0, FALSE,
    1021             :                                      DUPLICATE_SAME_ACCESS))
    1022             :                 {
    1023             :                     char        name[64];
    1024             : 
    1025             :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
    1026             :                     _dosmaperr(GetLastError());
    1027             :                     ereport(ERROR,
    1028             :                             (errcode_for_dynamic_shared_memory(),
    1029             :                              errmsg("could not duplicate handle for \"%s\": %m",
    1030             :                                     name)));
    1031             :                 }
    1032             : 
    1033             :                 /*
    1034             :                  * Here, we remember the handle that we created in the
    1035             :                  * postmaster process.  This handle isn't actually usable in
    1036             :                  * any process other than the postmaster, but that doesn't
    1037             :                  * matter.  We're just holding onto it so that, if the segment
    1038             :                  * is unpinned, dsm_impl_unpin_segment can close it.
    1039             :                  */
    1040             :                 *impl_private_pm_handle = hmap;
    1041             :                 break;
    1042             :             }
    1043             : #endif
    1044             :         default:
    1045           2 :             break;
    1046             :     }
    1047           2 : }
    1048             : 
    1049             : /*
    1050             :  * Implementation-specific actions that must be performed when a segment is no
    1051             :  * longer to be preserved, so that it will be cleaned up when all backends
    1052             :  * have detached from it.
    1053             :  *
    1054             :  * Except on Windows, we don't need to do anything at all.  For Windows, we
    1055             :  * close the extra handle that dsm_impl_pin_segment created in the
    1056             :  * postmaster's process space.
    1057             :  */
    1058             : void
    1059           2 : dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
    1060             : {
    1061           2 :     switch (dynamic_shared_memory_type)
    1062             :     {
    1063             : #ifdef USE_DSM_WINDOWS
    1064             :         case DSM_IMPL_WINDOWS:
    1065             :             {
    1066             :                 if (*impl_private &&
    1067             :                     !DuplicateHandle(PostmasterHandle, *impl_private,
    1068             :                                      NULL, NULL, 0, FALSE,
    1069             :                                      DUPLICATE_CLOSE_SOURCE))
    1070             :                 {
    1071             :                     char        name[64];
    1072             : 
    1073             :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
    1074             :                     _dosmaperr(GetLastError());
    1075             :                     ereport(ERROR,
    1076             :                             (errcode_for_dynamic_shared_memory(),
    1077             :                              errmsg("could not duplicate handle for \"%s\": %m",
    1078             :                                     name)));
    1079             :                 }
    1080             : 
    1081             :                 *impl_private = NULL;
    1082             :                 break;
    1083             :             }
    1084             : #endif
    1085             :         default:
    1086           2 :             break;
    1087             :     }
    1088           2 : }
    1089             : 
    1090             : static int
    1091           0 : errcode_for_dynamic_shared_memory(void)
    1092             : {
    1093           0 :     if (errno == EFBIG || errno == ENOMEM)
    1094           0 :         return errcode(ERRCODE_OUT_OF_MEMORY);
    1095             :     else
    1096           0 :         return errcode_for_file_access();
    1097             : }

Generated by: LCOV version 1.11