Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gininsert.c
4 : * insert routines for the postgres inverted index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/gininsert.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/ginxlog.h"
19 : #include "access/xloginsert.h"
20 : #include "catalog/index.h"
21 : #include "miscadmin.h"
22 : #include "storage/bufmgr.h"
23 : #include "storage/smgr.h"
24 : #include "storage/indexfsm.h"
25 : #include "utils/memutils.h"
26 : #include "utils/rel.h"
27 :
28 :
29 : typedef struct
30 : {
31 : GinState ginstate;
32 : double indtuples;
33 : GinStatsData buildStats;
34 : MemoryContext tmpCtx;
35 : MemoryContext funcCtx;
36 : BuildAccumulator accum;
37 : } GinBuildState;
38 :
39 :
40 : /*
41 : * Adds array of item pointers to tuple's posting list, or
42 : * creates posting tree and tuple pointing to tree in case
43 : * of not enough space. Max size of tuple is defined in
44 : * GinFormTuple(). Returns a new, modified index tuple.
45 : * items[] must be in sorted order with no duplicates.
46 : */
47 : static IndexTuple
48 3665 : addItemPointersToLeafTuple(GinState *ginstate,
49 : IndexTuple old,
50 : ItemPointerData *items, uint32 nitem,
51 : GinStatsData *buildStats)
52 : {
53 : OffsetNumber attnum;
54 : Datum key;
55 : GinNullCategory category;
56 : IndexTuple res;
57 : ItemPointerData *newItems,
58 : *oldItems;
59 : int oldNPosting,
60 : newNPosting;
61 : GinPostingList *compressedList;
62 :
63 3665 : Assert(!GinIsPostingTree(old));
64 :
65 3665 : attnum = gintuple_get_attrnum(ginstate, old);
66 3665 : key = gintuple_get_key(ginstate, old, &category);
67 :
68 : /* merge the old and new posting lists */
69 3665 : oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
70 :
71 3665 : newItems = ginMergeItemPointers(items, nitem,
72 : oldItems, oldNPosting,
73 : &newNPosting);
74 :
75 : /* Compress the posting list, and try to a build tuple with room for it */
76 3665 : res = NULL;
77 3665 : compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize,
78 : NULL);
79 3665 : pfree(newItems);
80 3665 : if (compressedList)
81 : {
82 7330 : res = GinFormTuple(ginstate, attnum, key, category,
83 : (char *) compressedList,
84 3665 : SizeOfGinPostingList(compressedList),
85 : newNPosting,
86 : false);
87 3665 : pfree(compressedList);
88 : }
89 3665 : if (!res)
90 : {
91 : /* posting list would be too big, convert to posting tree */
92 : BlockNumber postingRoot;
93 :
94 : /*
95 : * Initialize posting tree with the old tuple's posting list. It's
96 : * surely small enough to fit on one posting-tree page, and should
97 : * already be in order with no duplicates.
98 : */
99 1 : postingRoot = createPostingTree(ginstate->index,
100 : oldItems,
101 : oldNPosting,
102 : buildStats);
103 :
104 : /* Now insert the TIDs-to-be-added into the posting tree */
105 1 : ginInsertItemPointers(ginstate->index, postingRoot,
106 : items, nitem,
107 : buildStats);
108 :
109 : /* And build a new posting-tree-only result tuple */
110 1 : res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
111 1 : GinSetPostingTree(res, postingRoot);
112 : }
113 3665 : pfree(oldItems);
114 :
115 3665 : return res;
116 : }
117 :
118 : /*
119 : * Build a fresh leaf tuple, either posting-list or posting-tree format
120 : * depending on whether the given items list will fit.
121 : * items[] must be in sorted order with no duplicates.
122 : *
123 : * This is basically the same logic as in addItemPointersToLeafTuple,
124 : * but working from slightly different input.
125 : */
126 : static IndexTuple
127 34485 : buildFreshLeafTuple(GinState *ginstate,
128 : OffsetNumber attnum, Datum key, GinNullCategory category,
129 : ItemPointerData *items, uint32 nitem,
130 : GinStatsData *buildStats)
131 : {
132 34485 : IndexTuple res = NULL;
133 : GinPostingList *compressedList;
134 :
135 : /* try to build a posting list tuple with all the items */
136 34485 : compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, NULL);
137 34485 : if (compressedList)
138 : {
139 68970 : res = GinFormTuple(ginstate, attnum, key, category,
140 : (char *) compressedList,
141 34485 : SizeOfGinPostingList(compressedList),
142 : nitem, false);
143 34485 : pfree(compressedList);
144 : }
145 34485 : if (!res)
146 : {
147 : /* posting list would be too big, build posting tree */
148 : BlockNumber postingRoot;
149 :
150 : /*
151 : * Build posting-tree-only result tuple. We do this first so as to
152 : * fail quickly if the key is too big.
153 : */
154 3 : res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
155 :
156 : /*
157 : * Initialize a new posting tree with the TIDs.
158 : */
159 3 : postingRoot = createPostingTree(ginstate->index, items, nitem,
160 : buildStats);
161 :
162 : /* And save the root link in the result tuple */
163 3 : GinSetPostingTree(res, postingRoot);
164 : }
165 :
166 34485 : return res;
167 : }
168 :
169 : /*
170 : * Insert one or more heap TIDs associated with the given key value.
171 : * This will either add a single key entry, or enlarge a pre-existing entry.
172 : *
173 : * During an index build, buildStats is non-null and the counters
174 : * it contains should be incremented as needed.
175 : */
176 : void
177 41481 : ginEntryInsert(GinState *ginstate,
178 : OffsetNumber attnum, Datum key, GinNullCategory category,
179 : ItemPointerData *items, uint32 nitem,
180 : GinStatsData *buildStats)
181 : {
182 : GinBtreeData btree;
183 : GinBtreeEntryInsertData insertdata;
184 : GinBtreeStack *stack;
185 : IndexTuple itup;
186 : Page page;
187 :
188 41481 : insertdata.isDelete = FALSE;
189 :
190 : /* During index build, count the to-be-inserted entry */
191 41481 : if (buildStats)
192 14485 : buildStats->nEntries++;
193 :
194 41481 : ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
195 :
196 41481 : stack = ginFindLeafPage(&btree, false, NULL);
197 41481 : page = BufferGetPage(stack->buffer);
198 :
199 41481 : if (btree.findItem(&btree, stack))
200 : {
201 : /* found pre-existing entry */
202 6996 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
203 :
204 6996 : if (GinIsPostingTree(itup))
205 : {
206 : /* add entries to existing posting tree */
207 3331 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
208 :
209 : /* release all stack */
210 3331 : LockBuffer(stack->buffer, GIN_UNLOCK);
211 3331 : freeGinBtreeStack(stack);
212 :
213 : /* insert into posting tree */
214 3331 : ginInsertItemPointers(ginstate->index, rootPostingTree,
215 : items, nitem,
216 : buildStats);
217 3331 : return;
218 : }
219 :
220 : /* modify an existing leaf entry */
221 3665 : itup = addItemPointersToLeafTuple(ginstate, itup,
222 : items, nitem, buildStats);
223 :
224 3665 : insertdata.isDelete = TRUE;
225 : }
226 : else
227 : {
228 : /* no match, so construct a new leaf entry */
229 34485 : itup = buildFreshLeafTuple(ginstate, attnum, key, category,
230 : items, nitem, buildStats);
231 : }
232 :
233 : /* Insert the new or modified leaf tuple */
234 38150 : insertdata.entry = itup;
235 38150 : ginInsertValue(&btree, stack, &insertdata, buildStats);
236 38150 : pfree(itup);
237 : }
238 :
239 : /*
240 : * Extract index entries for a single indexable item, and add them to the
241 : * BuildAccumulator's state.
242 : *
243 : * This function is used only during initial index creation.
244 : */
245 : static void
246 14064 : ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
247 : Datum value, bool isNull,
248 : ItemPointer heapptr)
249 : {
250 : Datum *entries;
251 : GinNullCategory *categories;
252 : int32 nentries;
253 : MemoryContext oldCtx;
254 :
255 14064 : oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
256 14064 : entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
257 : value, isNull,
258 : &nentries, &categories);
259 14064 : MemoryContextSwitchTo(oldCtx);
260 :
261 14064 : ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
262 : entries, categories, nentries);
263 :
264 14064 : buildstate->indtuples += nentries;
265 :
266 14064 : MemoryContextReset(buildstate->funcCtx);
267 14064 : }
268 :
269 : static void
270 13961 : ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
271 : bool *isnull, bool tupleIsAlive, void *state)
272 : {
273 13961 : GinBuildState *buildstate = (GinBuildState *) state;
274 : MemoryContext oldCtx;
275 : int i;
276 :
277 13961 : oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
278 :
279 28025 : for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
280 42192 : ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
281 28128 : values[i], isnull[i],
282 : &htup->t_self);
283 :
284 : /* If we've maxed out our available memory, dump everything to the index */
285 13961 : if (buildstate->accum.allocatedMemory >= (Size) maintenance_work_mem * 1024L)
286 : {
287 : ItemPointerData *list;
288 : Datum key;
289 : GinNullCategory category;
290 : uint32 nlist;
291 : OffsetNumber attnum;
292 :
293 0 : ginBeginBAScan(&buildstate->accum);
294 0 : while ((list = ginGetBAEntry(&buildstate->accum,
295 : &attnum, &key, &category, &nlist)) != NULL)
296 : {
297 : /* there could be many entries, so be willing to abort here */
298 0 : CHECK_FOR_INTERRUPTS();
299 0 : ginEntryInsert(&buildstate->ginstate, attnum, key, category,
300 : list, nlist, &buildstate->buildStats);
301 : }
302 :
303 0 : MemoryContextReset(buildstate->tmpCtx);
304 0 : ginInitBA(&buildstate->accum);
305 : }
306 :
307 13961 : MemoryContextSwitchTo(oldCtx);
308 13961 : }
309 :
310 : IndexBuildResult *
311 13 : ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
312 : {
313 : IndexBuildResult *result;
314 : double reltuples;
315 : GinBuildState buildstate;
316 : Buffer RootBuffer,
317 : MetaBuffer;
318 : ItemPointerData *list;
319 : Datum key;
320 : GinNullCategory category;
321 : uint32 nlist;
322 : MemoryContext oldCtx;
323 : OffsetNumber attnum;
324 :
325 13 : if (RelationGetNumberOfBlocks(index) != 0)
326 0 : elog(ERROR, "index \"%s\" already contains data",
327 : RelationGetRelationName(index));
328 :
329 13 : initGinState(&buildstate.ginstate, index);
330 13 : buildstate.indtuples = 0;
331 13 : memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
332 :
333 : /* initialize the meta page */
334 13 : MetaBuffer = GinNewBuffer(index);
335 :
336 : /* initialize the root page */
337 13 : RootBuffer = GinNewBuffer(index);
338 :
339 13 : START_CRIT_SECTION();
340 13 : GinInitMetabuffer(MetaBuffer);
341 13 : MarkBufferDirty(MetaBuffer);
342 13 : GinInitBuffer(RootBuffer, GIN_LEAF);
343 13 : MarkBufferDirty(RootBuffer);
344 :
345 13 : if (RelationNeedsWAL(index))
346 : {
347 : XLogRecPtr recptr;
348 : Page page;
349 :
350 10 : XLogBeginInsert();
351 10 : XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT);
352 10 : XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
353 :
354 10 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
355 :
356 10 : page = BufferGetPage(RootBuffer);
357 10 : PageSetLSN(page, recptr);
358 :
359 10 : page = BufferGetPage(MetaBuffer);
360 10 : PageSetLSN(page, recptr);
361 : }
362 :
363 13 : UnlockReleaseBuffer(MetaBuffer);
364 13 : UnlockReleaseBuffer(RootBuffer);
365 13 : END_CRIT_SECTION();
366 :
367 : /* count the root as first entry page */
368 13 : buildstate.buildStats.nEntryPages++;
369 :
370 : /*
371 : * create a temporary memory context that is used to hold data not yet
372 : * dumped out to the index
373 : */
374 13 : buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
375 : "Gin build temporary context",
376 : ALLOCSET_DEFAULT_SIZES);
377 :
378 : /*
379 : * create a temporary memory context that is used for calling
380 : * ginExtractEntries(), and can be reset after each tuple
381 : */
382 13 : buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
383 : "Gin build temporary context for user-defined function",
384 : ALLOCSET_DEFAULT_SIZES);
385 :
386 13 : buildstate.accum.ginstate = &buildstate.ginstate;
387 13 : ginInitBA(&buildstate.accum);
388 :
389 : /*
390 : * Do the heap scan. We disallow sync scan here because dataPlaceToPage
391 : * prefers to receive tuples in TID order.
392 : */
393 13 : reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
394 : ginBuildCallback, (void *) &buildstate);
395 :
396 : /* dump remaining entries to the index */
397 13 : oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
398 13 : ginBeginBAScan(&buildstate.accum);
399 14511 : while ((list = ginGetBAEntry(&buildstate.accum,
400 : &attnum, &key, &category, &nlist)) != NULL)
401 : {
402 : /* there could be many entries, so be willing to abort here */
403 14485 : CHECK_FOR_INTERRUPTS();
404 14485 : ginEntryInsert(&buildstate.ginstate, attnum, key, category,
405 : list, nlist, &buildstate.buildStats);
406 : }
407 13 : MemoryContextSwitchTo(oldCtx);
408 :
409 13 : MemoryContextDelete(buildstate.funcCtx);
410 13 : MemoryContextDelete(buildstate.tmpCtx);
411 :
412 : /*
413 : * Update metapage stats
414 : */
415 13 : buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
416 13 : ginUpdateStats(index, &buildstate.buildStats);
417 :
418 : /*
419 : * Return statistics
420 : */
421 13 : result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
422 :
423 13 : result->heap_tuples = reltuples;
424 13 : result->index_tuples = buildstate.indtuples;
425 :
426 13 : return result;
427 : }
428 :
429 : /*
430 : * ginbuildempty() -- build an empty gin index in the initialization fork
431 : */
432 : void
433 0 : ginbuildempty(Relation index)
434 : {
435 : Buffer RootBuffer,
436 : MetaBuffer;
437 :
438 : /* An empty GIN index has two pages. */
439 0 : MetaBuffer =
440 : ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
441 0 : LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE);
442 0 : RootBuffer =
443 : ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
444 0 : LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE);
445 :
446 : /* Initialize and xlog metabuffer and root buffer. */
447 0 : START_CRIT_SECTION();
448 0 : GinInitMetabuffer(MetaBuffer);
449 0 : MarkBufferDirty(MetaBuffer);
450 0 : log_newpage_buffer(MetaBuffer, false);
451 0 : GinInitBuffer(RootBuffer, GIN_LEAF);
452 0 : MarkBufferDirty(RootBuffer);
453 0 : log_newpage_buffer(RootBuffer, false);
454 0 : END_CRIT_SECTION();
455 :
456 : /* Unlock and release the buffers. */
457 0 : UnlockReleaseBuffer(MetaBuffer);
458 0 : UnlockReleaseBuffer(RootBuffer);
459 0 : }
460 :
461 : /*
462 : * Insert index entries for a single indexable item during "normal"
463 : * (non-fast-update) insertion
464 : */
465 : static void
466 2000 : ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
467 : Datum value, bool isNull,
468 : ItemPointer item)
469 : {
470 : Datum *entries;
471 : GinNullCategory *categories;
472 : int32 i,
473 : nentries;
474 :
475 2000 : entries = ginExtractEntries(ginstate, attnum, value, isNull,
476 : &nentries, &categories);
477 :
478 7996 : for (i = 0; i < nentries; i++)
479 5996 : ginEntryInsert(ginstate, attnum, entries[i], categories[i],
480 : item, 1, NULL);
481 2000 : }
482 :
483 : bool
484 24006 : gininsert(Relation index, Datum *values, bool *isnull,
485 : ItemPointer ht_ctid, Relation heapRel,
486 : IndexUniqueCheck checkUnique,
487 : IndexInfo *indexInfo)
488 : {
489 24006 : GinState *ginstate = (GinState *) indexInfo->ii_AmCache;
490 : MemoryContext oldCtx;
491 : MemoryContext insertCtx;
492 : int i;
493 :
494 : /* Initialize GinState cache if first call in this statement */
495 24006 : if (ginstate == NULL)
496 : {
497 11 : oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
498 11 : ginstate = (GinState *) palloc(sizeof(GinState));
499 11 : initGinState(ginstate, index);
500 11 : indexInfo->ii_AmCache = (void *) ginstate;
501 11 : MemoryContextSwitchTo(oldCtx);
502 : }
503 :
504 24006 : insertCtx = AllocSetContextCreate(CurrentMemoryContext,
505 : "Gin insert temporary context",
506 : ALLOCSET_DEFAULT_SIZES);
507 :
508 24006 : oldCtx = MemoryContextSwitchTo(insertCtx);
509 :
510 24006 : if (GinGetUseFastUpdate(index))
511 22006 : {
512 : GinTupleCollector collector;
513 :
514 22006 : memset(&collector, 0, sizeof(GinTupleCollector));
515 :
516 44012 : for (i = 0; i < ginstate->origTupdesc->natts; i++)
517 66018 : ginHeapTupleFastCollect(ginstate, &collector,
518 22006 : (OffsetNumber) (i + 1),
519 44012 : values[i], isnull[i],
520 : ht_ctid);
521 :
522 22006 : ginHeapTupleFastInsert(ginstate, &collector);
523 : }
524 : else
525 : {
526 4000 : for (i = 0; i < ginstate->origTupdesc->natts; i++)
527 4000 : ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
528 4000 : values[i], isnull[i],
529 : ht_ctid);
530 : }
531 :
532 24006 : MemoryContextSwitchTo(oldCtx);
533 24006 : MemoryContextDelete(insertCtx);
534 :
535 24006 : return false;
536 : }
|