Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * reinit.c
4 : * Reinitialization of unlogged relations
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/storage/file/reinit.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include <unistd.h>
18 :
19 : #include "catalog/catalog.h"
20 : #include "common/relpath.h"
21 : #include "storage/copydir.h"
22 : #include "storage/fd.h"
23 : #include "storage/reinit.h"
24 : #include "utils/hsearch.h"
25 : #include "utils/memutils.h"
26 :
27 : static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname,
28 : int op);
29 : static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
30 : int op);
31 : static bool parse_filename_for_nontemp_relation(const char *name,
32 : int *oidchars, ForkNumber *fork);
33 :
34 : typedef struct
35 : {
36 : char oid[OIDCHARS + 1];
37 : } unlogged_relation_entry;
38 :
39 : /*
40 : * Reset unlogged relations from before the last restart.
41 : *
42 : * If op includes UNLOGGED_RELATION_CLEANUP, we remove all forks of any
43 : * relation with an "init" fork, except for the "init" fork itself.
44 : *
45 : * If op includes UNLOGGED_RELATION_INIT, we copy the "init" fork to the main
46 : * fork.
47 : */
48 : void
49 0 : ResetUnloggedRelations(int op)
50 : {
51 : char temp_path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)];
52 : DIR *spc_dir;
53 : struct dirent *spc_de;
54 : MemoryContext tmpctx,
55 : oldctx;
56 :
57 : /* Log it. */
58 0 : elog(DEBUG1, "resetting unlogged relations: cleanup %d init %d",
59 : (op & UNLOGGED_RELATION_CLEANUP) != 0,
60 : (op & UNLOGGED_RELATION_INIT) != 0);
61 :
62 : /*
63 : * Just to be sure we don't leak any memory, let's create a temporary
64 : * memory context for this operation.
65 : */
66 0 : tmpctx = AllocSetContextCreate(CurrentMemoryContext,
67 : "ResetUnloggedRelations",
68 : ALLOCSET_DEFAULT_SIZES);
69 0 : oldctx = MemoryContextSwitchTo(tmpctx);
70 :
71 : /*
72 : * First process unlogged files in pg_default ($PGDATA/base)
73 : */
74 0 : ResetUnloggedRelationsInTablespaceDir("base", op);
75 :
76 : /*
77 : * Cycle through directories for all non-default tablespaces.
78 : */
79 0 : spc_dir = AllocateDir("pg_tblspc");
80 :
81 0 : while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL)
82 : {
83 0 : if (strcmp(spc_de->d_name, ".") == 0 ||
84 0 : strcmp(spc_de->d_name, "..") == 0)
85 0 : continue;
86 :
87 0 : snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
88 0 : spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
89 0 : ResetUnloggedRelationsInTablespaceDir(temp_path, op);
90 : }
91 :
92 0 : FreeDir(spc_dir);
93 :
94 : /*
95 : * Restore memory context.
96 : */
97 0 : MemoryContextSwitchTo(oldctx);
98 0 : MemoryContextDelete(tmpctx);
99 0 : }
100 :
101 : /* Process one tablespace directory for ResetUnloggedRelations */
102 : static void
103 0 : ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
104 : {
105 : DIR *ts_dir;
106 : struct dirent *de;
107 : char dbspace_path[MAXPGPATH * 2];
108 :
109 0 : ts_dir = AllocateDir(tsdirname);
110 0 : if (ts_dir == NULL)
111 : {
112 : /* anything except ENOENT is fishy */
113 0 : if (errno != ENOENT)
114 0 : elog(LOG,
115 : "could not open tablespace directory \"%s\": %m",
116 : tsdirname);
117 0 : return;
118 : }
119 :
120 0 : while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
121 : {
122 0 : int i = 0;
123 :
124 : /*
125 : * We're only interested in the per-database directories, which have
126 : * numeric names. Note that this code will also (properly) ignore "."
127 : * and "..".
128 : */
129 0 : while (isdigit((unsigned char) de->d_name[i]))
130 0 : ++i;
131 0 : if (de->d_name[i] != '\0' || i == 0)
132 0 : continue;
133 :
134 0 : snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
135 0 : tsdirname, de->d_name);
136 0 : ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
137 : }
138 :
139 0 : FreeDir(ts_dir);
140 : }
141 :
142 : /* Process one per-dbspace directory for ResetUnloggedRelations */
143 : static void
144 0 : ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
145 : {
146 : DIR *dbspace_dir;
147 : struct dirent *de;
148 : char rm_path[MAXPGPATH * 2];
149 :
150 : /* Caller must specify at least one operation. */
151 0 : Assert((op & (UNLOGGED_RELATION_CLEANUP | UNLOGGED_RELATION_INIT)) != 0);
152 :
153 : /*
154 : * Cleanup is a two-pass operation. First, we go through and identify all
155 : * the files with init forks. Then, we go through again and nuke
156 : * everything with the same OID except the init fork.
157 : */
158 0 : if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
159 : {
160 0 : HTAB *hash = NULL;
161 : HASHCTL ctl;
162 :
163 : /* Open the directory. */
164 0 : dbspace_dir = AllocateDir(dbspacedirname);
165 0 : if (dbspace_dir == NULL)
166 : {
167 0 : elog(LOG,
168 : "could not open dbspace directory \"%s\": %m",
169 : dbspacedirname);
170 0 : return;
171 : }
172 :
173 : /*
174 : * It's possible that someone could create a ton of unlogged relations
175 : * in the same database & tablespace, so we'd better use a hash table
176 : * rather than an array or linked list to keep track of which files
177 : * need to be reset. Otherwise, this cleanup operation would be
178 : * O(n^2).
179 : */
180 0 : ctl.keysize = sizeof(unlogged_relation_entry);
181 0 : ctl.entrysize = sizeof(unlogged_relation_entry);
182 0 : hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM);
183 :
184 : /* Scan the directory. */
185 0 : while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
186 : {
187 : ForkNumber forkNum;
188 : int oidchars;
189 : unlogged_relation_entry ent;
190 :
191 : /* Skip anything that doesn't look like a relation data file. */
192 0 : if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
193 : &forkNum))
194 0 : continue;
195 :
196 : /* Also skip it unless this is the init fork. */
197 0 : if (forkNum != INIT_FORKNUM)
198 0 : continue;
199 :
200 : /*
201 : * Put the OID portion of the name into the hash table, if it
202 : * isn't already.
203 : */
204 0 : memset(ent.oid, 0, sizeof(ent.oid));
205 0 : memcpy(ent.oid, de->d_name, oidchars);
206 0 : hash_search(hash, &ent, HASH_ENTER, NULL);
207 : }
208 :
209 : /* Done with the first pass. */
210 0 : FreeDir(dbspace_dir);
211 :
212 : /*
213 : * If we didn't find any init forks, there's no point in continuing;
214 : * we can bail out now.
215 : */
216 0 : if (hash_get_num_entries(hash) == 0)
217 : {
218 0 : hash_destroy(hash);
219 0 : return;
220 : }
221 :
222 : /*
223 : * Now, make a second pass and remove anything that matches. First,
224 : * reopen the directory.
225 : */
226 0 : dbspace_dir = AllocateDir(dbspacedirname);
227 0 : if (dbspace_dir == NULL)
228 : {
229 0 : elog(LOG,
230 : "could not open dbspace directory \"%s\": %m",
231 : dbspacedirname);
232 0 : hash_destroy(hash);
233 0 : return;
234 : }
235 :
236 : /* Scan the directory. */
237 0 : while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
238 : {
239 : ForkNumber forkNum;
240 : int oidchars;
241 : bool found;
242 : unlogged_relation_entry ent;
243 :
244 : /* Skip anything that doesn't look like a relation data file. */
245 0 : if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
246 : &forkNum))
247 0 : continue;
248 :
249 : /* We never remove the init fork. */
250 0 : if (forkNum == INIT_FORKNUM)
251 0 : continue;
252 :
253 : /*
254 : * See whether the OID portion of the name shows up in the hash
255 : * table.
256 : */
257 0 : memset(ent.oid, 0, sizeof(ent.oid));
258 0 : memcpy(ent.oid, de->d_name, oidchars);
259 0 : hash_search(hash, &ent, HASH_FIND, &found);
260 :
261 : /* If so, nuke it! */
262 0 : if (found)
263 : {
264 0 : snprintf(rm_path, sizeof(rm_path), "%s/%s",
265 0 : dbspacedirname, de->d_name);
266 :
267 : /*
268 : * It's tempting to actually throw an error here, but since
269 : * this code gets run during database startup, that could
270 : * result in the database failing to start. (XXX Should we do
271 : * it anyway?)
272 : */
273 0 : if (unlink(rm_path))
274 0 : elog(LOG, "could not unlink file \"%s\": %m", rm_path);
275 : else
276 0 : elog(DEBUG2, "unlinked file \"%s\"", rm_path);
277 : }
278 : }
279 :
280 : /* Cleanup is complete. */
281 0 : FreeDir(dbspace_dir);
282 0 : hash_destroy(hash);
283 : }
284 :
285 : /*
286 : * Initialization happens after cleanup is complete: we copy each init
287 : * fork file to the corresponding main fork file. Note that if we are
288 : * asked to do both cleanup and init, we may never get here: if the
289 : * cleanup code determines that there are no init forks in this dbspace,
290 : * it will return before we get to this point.
291 : */
292 0 : if ((op & UNLOGGED_RELATION_INIT) != 0)
293 : {
294 : /* Open the directory. */
295 0 : dbspace_dir = AllocateDir(dbspacedirname);
296 0 : if (dbspace_dir == NULL)
297 : {
298 : /* we just saw this directory, so it really ought to be there */
299 0 : elog(LOG,
300 : "could not open dbspace directory \"%s\": %m",
301 : dbspacedirname);
302 0 : return;
303 : }
304 :
305 : /* Scan the directory. */
306 0 : while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
307 : {
308 : ForkNumber forkNum;
309 : int oidchars;
310 : char oidbuf[OIDCHARS + 1];
311 : char srcpath[MAXPGPATH * 2];
312 : char dstpath[MAXPGPATH];
313 :
314 : /* Skip anything that doesn't look like a relation data file. */
315 0 : if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
316 : &forkNum))
317 0 : continue;
318 :
319 : /* Also skip it unless this is the init fork. */
320 0 : if (forkNum != INIT_FORKNUM)
321 0 : continue;
322 :
323 : /* Construct source pathname. */
324 0 : snprintf(srcpath, sizeof(srcpath), "%s/%s",
325 0 : dbspacedirname, de->d_name);
326 :
327 : /* Construct destination pathname. */
328 0 : memcpy(oidbuf, de->d_name, oidchars);
329 0 : oidbuf[oidchars] = '\0';
330 0 : snprintf(dstpath, sizeof(dstpath), "%s/%s%s",
331 0 : dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
332 0 : strlen(forkNames[INIT_FORKNUM]));
333 :
334 : /* OK, we're ready to perform the actual copy. */
335 0 : elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
336 0 : copy_file(srcpath, dstpath);
337 : }
338 :
339 0 : FreeDir(dbspace_dir);
340 :
341 : /*
342 : * copy_file() above has already called pg_flush_data() on the files
343 : * it created. Now we need to fsync those files, because a checkpoint
344 : * won't do it for us while we're in recovery. We do this in a
345 : * separate pass to allow the kernel to perform all the flushes
346 : * (especially the metadata ones) at once.
347 : */
348 0 : dbspace_dir = AllocateDir(dbspacedirname);
349 0 : if (dbspace_dir == NULL)
350 : {
351 : /* we just saw this directory, so it really ought to be there */
352 0 : elog(LOG,
353 : "could not open dbspace directory \"%s\": %m",
354 : dbspacedirname);
355 0 : return;
356 : }
357 :
358 0 : while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
359 : {
360 : ForkNumber forkNum;
361 : int oidchars;
362 : char oidbuf[OIDCHARS + 1];
363 : char mainpath[MAXPGPATH];
364 :
365 : /* Skip anything that doesn't look like a relation data file. */
366 0 : if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars,
367 : &forkNum))
368 0 : continue;
369 :
370 : /* Also skip it unless this is the init fork. */
371 0 : if (forkNum != INIT_FORKNUM)
372 0 : continue;
373 :
374 : /* Construct main fork pathname. */
375 0 : memcpy(oidbuf, de->d_name, oidchars);
376 0 : oidbuf[oidchars] = '\0';
377 0 : snprintf(mainpath, sizeof(mainpath), "%s/%s%s",
378 0 : dbspacedirname, oidbuf, de->d_name + oidchars + 1 +
379 0 : strlen(forkNames[INIT_FORKNUM]));
380 :
381 0 : fsync_fname(mainpath, false);
382 : }
383 :
384 0 : FreeDir(dbspace_dir);
385 :
386 0 : fsync_fname(dbspacedirname, true);
387 : }
388 : }
389 :
390 : /*
391 : * Basic parsing of putative relation filenames.
392 : *
393 : * This function returns true if the file appears to be in the correct format
394 : * for a non-temporary relation and false otherwise.
395 : *
396 : * NB: If this function returns true, the caller is entitled to assume that
397 : * *oidchars has been set to the a value no more than OIDCHARS, and thus
398 : * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID
399 : * portion of the filename. This is critical to protect against a possible
400 : * buffer overrun.
401 : */
402 : static bool
403 0 : parse_filename_for_nontemp_relation(const char *name, int *oidchars,
404 : ForkNumber *fork)
405 : {
406 : int pos;
407 :
408 : /* Look for a non-empty string of digits (that isn't too long). */
409 0 : for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
410 : ;
411 0 : if (pos == 0 || pos > OIDCHARS)
412 0 : return false;
413 0 : *oidchars = pos;
414 :
415 : /* Check for a fork name. */
416 0 : if (name[pos] != '_')
417 0 : *fork = MAIN_FORKNUM;
418 : else
419 : {
420 : int forkchar;
421 :
422 0 : forkchar = forkname_chars(&name[pos + 1], fork);
423 0 : if (forkchar <= 0)
424 0 : return false;
425 0 : pos += forkchar + 1;
426 : }
427 :
428 : /* Check for a segment number. */
429 0 : if (name[pos] == '.')
430 : {
431 : int segchar;
432 :
433 0 : for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
434 : ;
435 0 : if (segchar <= 1)
436 0 : return false;
437 0 : pos += segchar;
438 : }
439 :
440 : /* Now we should be at the end. */
441 0 : if (name[pos] != '\0')
442 0 : return false;
443 0 : return true;
444 : }
|