Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * File-processing utility routines.
4 : *
5 : * Assorted utility functions to work on files.
6 : *
7 : *
8 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
9 : * Portions Copyright (c) 1994, Regents of the University of California
10 : *
11 : * src/common/file_utils.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres_fe.h"
16 :
17 : #include <dirent.h>
18 : #include <fcntl.h>
19 : #include <sys/stat.h>
20 : #include <unistd.h>
21 :
22 : #include "common/file_utils.h"
23 :
24 :
25 : /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
26 : #if defined(HAVE_SYNC_FILE_RANGE)
27 : #define PG_FLUSH_DATA_WORKS 1
28 : #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
29 : #define PG_FLUSH_DATA_WORKS 1
30 : #endif
31 :
32 : /*
33 : * pg_xlog has been renamed to pg_wal in version 10.
34 : */
35 : #define MINIMUM_VERSION_FOR_PG_WAL 100000
36 :
37 : #ifdef PG_FLUSH_DATA_WORKS
38 : static int pre_sync_fname(const char *fname, bool isdir,
39 : const char *progname);
40 : #endif
41 : static void walkdir(const char *path,
42 : int (*action) (const char *fname, bool isdir, const char *progname),
43 : bool process_symlinks, const char *progname);
44 :
45 : /*
46 : * Issue fsync recursively on PGDATA and all its contents.
47 : *
48 : * We fsync regular files and directories wherever they are, but we follow
49 : * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
50 : * Other symlinks are presumed to point at files we're not responsible for
51 : * fsyncing, and might not have privileges to write at all.
52 : *
53 : * serverVersion indicates the version of the server to be fsync'd.
54 : *
55 : * Errors are reported but not considered fatal.
56 : */
57 : void
58 0 : fsync_pgdata(const char *pg_data,
59 : const char *progname,
60 : int serverVersion)
61 : {
62 : bool xlog_is_symlink;
63 : char pg_wal[MAXPGPATH];
64 : char pg_tblspc[MAXPGPATH];
65 :
66 : /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
67 0 : snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
68 : serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
69 0 : snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
70 :
71 : /*
72 : * If pg_wal is a symlink, we'll need to recurse into it separately,
73 : * because the first walkdir below will ignore it.
74 : */
75 0 : xlog_is_symlink = false;
76 :
77 : #ifndef WIN32
78 : {
79 : struct stat st;
80 :
81 0 : if (lstat(pg_wal, &st) < 0)
82 0 : fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
83 0 : progname, pg_wal, strerror(errno));
84 0 : else if (S_ISLNK(st.st_mode))
85 0 : xlog_is_symlink = true;
86 : }
87 : #else
88 : if (pgwin32_is_junction(pg_wal))
89 : xlog_is_symlink = true;
90 : #endif
91 :
92 : /*
93 : * If possible, hint to the kernel that we're soon going to fsync the data
94 : * directory and its contents.
95 : */
96 : #ifdef PG_FLUSH_DATA_WORKS
97 0 : walkdir(pg_data, pre_sync_fname, false, progname);
98 0 : if (xlog_is_symlink)
99 0 : walkdir(pg_wal, pre_sync_fname, false, progname);
100 0 : walkdir(pg_tblspc, pre_sync_fname, true, progname);
101 : #endif
102 :
103 : /*
104 : * Now we do the fsync()s in the same order.
105 : *
106 : * The main call ignores symlinks, so in addition to specially processing
107 : * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
108 : * process_symlinks = true. Note that if there are any plain directories
109 : * in pg_tblspc, they'll get fsync'd twice. That's not an expected case
110 : * so we don't worry about optimizing it.
111 : */
112 0 : walkdir(pg_data, fsync_fname, false, progname);
113 0 : if (xlog_is_symlink)
114 0 : walkdir(pg_wal, fsync_fname, false, progname);
115 0 : walkdir(pg_tblspc, fsync_fname, true, progname);
116 0 : }
117 :
118 : /*
119 : * Issue fsync recursively on the given directory and all its contents.
120 : *
121 : * This is a convenient wrapper on top of walkdir().
122 : */
123 : void
124 0 : fsync_dir_recurse(const char *dir, const char *progname)
125 : {
126 : /*
127 : * If possible, hint to the kernel that we're soon going to fsync the data
128 : * directory and its contents.
129 : */
130 : #ifdef PG_FLUSH_DATA_WORKS
131 0 : walkdir(dir, pre_sync_fname, false, progname);
132 : #endif
133 :
134 0 : walkdir(dir, fsync_fname, false, progname);
135 0 : }
136 :
137 : /*
138 : * walkdir: recursively walk a directory, applying the action to each
139 : * regular file and directory (including the named directory itself).
140 : *
141 : * If process_symlinks is true, the action and recursion are also applied
142 : * to regular files and directories that are pointed to by symlinks in the
143 : * given directory; otherwise symlinks are ignored. Symlinks are always
144 : * ignored in subdirectories, ie we intentionally don't pass down the
145 : * process_symlinks flag to recursive calls.
146 : *
147 : * Errors are reported but not considered fatal.
148 : *
149 : * See also walkdir in fd.c, which is a backend version of this logic.
150 : */
151 : static void
152 0 : walkdir(const char *path,
153 : int (*action) (const char *fname, bool isdir, const char *progname),
154 : bool process_symlinks, const char *progname)
155 : {
156 : DIR *dir;
157 : struct dirent *de;
158 :
159 0 : dir = opendir(path);
160 0 : if (dir == NULL)
161 : {
162 0 : fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
163 0 : progname, path, strerror(errno));
164 0 : return;
165 : }
166 :
167 0 : while (errno = 0, (de = readdir(dir)) != NULL)
168 : {
169 : char subpath[MAXPGPATH * 2];
170 : struct stat fst;
171 : int sret;
172 :
173 0 : if (strcmp(de->d_name, ".") == 0 ||
174 0 : strcmp(de->d_name, "..") == 0)
175 0 : continue;
176 :
177 0 : snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
178 :
179 0 : if (process_symlinks)
180 0 : sret = stat(subpath, &fst);
181 : else
182 0 : sret = lstat(subpath, &fst);
183 :
184 0 : if (sret < 0)
185 : {
186 0 : fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
187 0 : progname, subpath, strerror(errno));
188 0 : continue;
189 : }
190 :
191 0 : if (S_ISREG(fst.st_mode))
192 0 : (*action) (subpath, false, progname);
193 0 : else if (S_ISDIR(fst.st_mode))
194 0 : walkdir(subpath, action, false, progname);
195 : }
196 :
197 0 : if (errno)
198 0 : fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
199 0 : progname, path, strerror(errno));
200 :
201 0 : (void) closedir(dir);
202 :
203 : /*
204 : * It's important to fsync the destination directory itself as individual
205 : * file fsyncs don't guarantee that the directory entry for the file is
206 : * synced. Recent versions of ext4 have made the window much wider but
207 : * it's been an issue for ext3 and other filesystems in the past.
208 : */
209 0 : (*action) (path, true, progname);
210 : }
211 :
212 : /*
213 : * Hint to the OS that it should get ready to fsync() this file.
214 : *
215 : * Ignores errors trying to open unreadable files, and reports other errors
216 : * non-fatally.
217 : */
218 : #ifdef PG_FLUSH_DATA_WORKS
219 :
220 : static int
221 0 : pre_sync_fname(const char *fname, bool isdir, const char *progname)
222 : {
223 : int fd;
224 :
225 0 : fd = open(fname, O_RDONLY | PG_BINARY);
226 :
227 0 : if (fd < 0)
228 : {
229 0 : if (errno == EACCES || (isdir && errno == EISDIR))
230 0 : return 0;
231 0 : fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
232 0 : progname, fname, strerror(errno));
233 0 : return -1;
234 : }
235 :
236 : /*
237 : * We do what pg_flush_data() would do in the backend: prefer to use
238 : * sync_file_range, but fall back to posix_fadvise. We ignore errors
239 : * because this is only a hint.
240 : */
241 : #if defined(HAVE_SYNC_FILE_RANGE)
242 0 : (void) sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE);
243 : #elif defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
244 : (void) posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
245 : #else
246 : #error PG_FLUSH_DATA_WORKS should not have been defined
247 : #endif
248 :
249 0 : (void) close(fd);
250 0 : return 0;
251 : }
252 :
253 : #endif /* PG_FLUSH_DATA_WORKS */
254 :
255 : /*
256 : * fsync_fname -- Try to fsync a file or directory
257 : *
258 : * Ignores errors trying to open unreadable files, or trying to fsync
259 : * directories on systems where that isn't allowed/required. Reports
260 : * other errors non-fatally.
261 : */
262 : int
263 0 : fsync_fname(const char *fname, bool isdir, const char *progname)
264 : {
265 : int fd;
266 : int flags;
267 : int returncode;
268 :
269 : /*
270 : * Some OSs require directories to be opened read-only whereas other
271 : * systems don't allow us to fsync files opened read-only; so we need both
272 : * cases here. Using O_RDWR will cause us to fail to fsync files that are
273 : * not writable by our userid, but we assume that's OK.
274 : */
275 0 : flags = PG_BINARY;
276 0 : if (!isdir)
277 0 : flags |= O_RDWR;
278 : else
279 0 : flags |= O_RDONLY;
280 :
281 : /*
282 : * Open the file, silently ignoring errors about unreadable files (or
283 : * unsupported operations, e.g. opening a directory under Windows), and
284 : * logging others.
285 : */
286 0 : fd = open(fname, flags);
287 0 : if (fd < 0)
288 : {
289 0 : if (errno == EACCES || (isdir && errno == EISDIR))
290 0 : return 0;
291 0 : fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
292 0 : progname, fname, strerror(errno));
293 0 : return -1;
294 : }
295 :
296 0 : returncode = fsync(fd);
297 :
298 : /*
299 : * Some OSes don't allow us to fsync directories at all, so we can ignore
300 : * those errors. Anything else needs to be reported.
301 : */
302 0 : if (returncode != 0 && !(isdir && errno == EBADF))
303 : {
304 0 : fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
305 0 : progname, fname, strerror(errno));
306 0 : (void) close(fd);
307 0 : return -1;
308 : }
309 :
310 0 : (void) close(fd);
311 0 : return 0;
312 : }
313 :
314 : /*
315 : * fsync_parent_path -- fsync the parent path of a file or directory
316 : *
317 : * This is aimed at making file operations persistent on disk in case of
318 : * an OS crash or power failure.
319 : */
320 : int
321 0 : fsync_parent_path(const char *fname, const char *progname)
322 : {
323 : char parentpath[MAXPGPATH];
324 :
325 0 : strlcpy(parentpath, fname, MAXPGPATH);
326 0 : get_parent_directory(parentpath);
327 :
328 : /*
329 : * get_parent_directory() returns an empty string if the input argument is
330 : * just a file name (see comments in path.c), so handle that as being the
331 : * current directory.
332 : */
333 0 : if (strlen(parentpath) == 0)
334 0 : strlcpy(parentpath, ".", MAXPGPATH);
335 :
336 0 : if (fsync_fname(parentpath, true, progname) != 0)
337 0 : return -1;
338 :
339 0 : return 0;
340 : }
341 :
342 : /*
343 : * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability
344 : *
345 : * Wrapper around rename, similar to the backend version.
346 : */
347 : int
348 0 : durable_rename(const char *oldfile, const char *newfile, const char *progname)
349 : {
350 : int fd;
351 :
352 : /*
353 : * First fsync the old and target path (if it exists), to ensure that they
354 : * are properly persistent on disk. Syncing the target file is not
355 : * strictly necessary, but it makes it easier to reason about crashes;
356 : * because it's then guaranteed that either source or target file exists
357 : * after a crash.
358 : */
359 0 : if (fsync_fname(oldfile, false, progname) != 0)
360 0 : return -1;
361 :
362 0 : fd = open(newfile, PG_BINARY | O_RDWR, 0);
363 0 : if (fd < 0)
364 : {
365 0 : if (errno != ENOENT)
366 : {
367 0 : fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
368 0 : progname, newfile, strerror(errno));
369 0 : return -1;
370 : }
371 : }
372 : else
373 : {
374 0 : if (fsync(fd) != 0)
375 : {
376 0 : fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
377 0 : progname, newfile, strerror(errno));
378 0 : close(fd);
379 0 : return -1;
380 : }
381 0 : close(fd);
382 : }
383 :
384 : /* Time to do the real deal... */
385 0 : if (rename(oldfile, newfile) != 0)
386 : {
387 0 : fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"),
388 0 : progname, oldfile, newfile, strerror(errno));
389 0 : return -1;
390 : }
391 :
392 : /*
393 : * To guarantee renaming the file is persistent, fsync the file with its
394 : * new name, and its containing directory.
395 : */
396 0 : if (fsync_fname(newfile, false, progname) != 0)
397 0 : return -1;
398 :
399 0 : if (fsync_parent_path(newfile, progname) != 0)
400 0 : return -1;
401 :
402 0 : return 0;
403 : }
|