Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * genam.c
4 : * general index access method routines
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/index/genam.c
12 : *
13 : * NOTES
14 : * many of the old access method routines have been turned into
15 : * macros and moved to genam.h -cim 4/30/91
16 : *
17 : *-------------------------------------------------------------------------
18 : */
19 :
20 : #include "postgres.h"
21 :
22 : #include "access/relscan.h"
23 : #include "access/transam.h"
24 : #include "catalog/index.h"
25 : #include "lib/stringinfo.h"
26 : #include "miscadmin.h"
27 : #include "storage/bufmgr.h"
28 : #include "utils/acl.h"
29 : #include "utils/builtins.h"
30 : #include "utils/lsyscache.h"
31 : #include "utils/rel.h"
32 : #include "utils/rls.h"
33 : #include "utils/ruleutils.h"
34 : #include "utils/snapmgr.h"
35 : #include "utils/syscache.h"
36 : #include "utils/tqual.h"
37 :
38 :
39 : /* ----------------------------------------------------------------
40 : * general access method routines
41 : *
42 : * All indexed access methods use an identical scan structure.
43 : * We don't know how the various AMs do locking, however, so we don't
44 : * do anything about that here.
45 : *
46 : * The intent is that an AM implementor will define a beginscan routine
47 : * that calls RelationGetIndexScan, to fill in the scan, and then does
48 : * whatever kind of locking he wants.
49 : *
50 : * At the end of a scan, the AM's endscan routine undoes the locking,
51 : * but does *not* call IndexScanEnd --- the higher-level index_endscan
52 : * routine does that. (We can't do it in the AM because index_endscan
53 : * still needs to touch the IndexScanDesc after calling the AM.)
54 : *
55 : * Because of this, the AM does not have a choice whether to call
56 : * RelationGetIndexScan or not; its beginscan routine must return an
57 : * object made by RelationGetIndexScan. This is kinda ugly but not
58 : * worth cleaning up now.
59 : * ----------------------------------------------------------------
60 : */
61 :
62 : /* ----------------
63 : * RelationGetIndexScan -- Create and fill an IndexScanDesc.
64 : *
65 : * This routine creates an index scan structure and sets up initial
66 : * contents for it.
67 : *
68 : * Parameters:
69 : * indexRelation -- index relation for scan.
70 : * nkeys -- count of scan keys (index qual conditions).
71 : * norderbys -- count of index order-by operators.
72 : *
73 : * Returns:
74 : * An initialized IndexScanDesc.
75 : * ----------------
76 : */
77 : IndexScanDesc
78 362856 : RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
79 : {
80 : IndexScanDesc scan;
81 :
82 362856 : scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
83 :
84 362856 : scan->heapRelation = NULL; /* may be set later */
85 362856 : scan->indexRelation = indexRelation;
86 362856 : scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */
87 362856 : scan->numberOfKeys = nkeys;
88 362856 : scan->numberOfOrderBys = norderbys;
89 :
90 : /*
91 : * We allocate key workspace here, but it won't get filled until amrescan.
92 : */
93 362856 : if (nkeys > 0)
94 362493 : scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
95 : else
96 363 : scan->keyData = NULL;
97 362856 : if (norderbys > 0)
98 8 : scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys);
99 : else
100 362848 : scan->orderByData = NULL;
101 :
102 362856 : scan->xs_want_itup = false; /* may be set later */
103 :
104 : /*
105 : * During recovery we ignore killed tuples and don't bother to kill them
106 : * either. We do this because the xmin on the primary node could easily be
107 : * later than the xmin on the standby node, so that what the primary
108 : * thinks is killed is supposed to be visible on standby. So for correct
109 : * MVCC for queries during recovery we must ignore these hints and check
110 : * all tuples. Do *not* set ignore_killed_tuples to true when running in a
111 : * transaction that was started during recovery. xactStartedInRecovery
112 : * should not be altered by index AMs.
113 : */
114 362856 : scan->kill_prior_tuple = false;
115 362856 : scan->xactStartedInRecovery = TransactionStartedDuringRecovery();
116 362856 : scan->ignore_killed_tuples = !scan->xactStartedInRecovery;
117 :
118 362856 : scan->opaque = NULL;
119 :
120 362856 : scan->xs_itup = NULL;
121 362856 : scan->xs_itupdesc = NULL;
122 362856 : scan->xs_hitup = NULL;
123 362856 : scan->xs_hitupdesc = NULL;
124 :
125 362856 : ItemPointerSetInvalid(&scan->xs_ctup.t_self);
126 362856 : scan->xs_ctup.t_data = NULL;
127 362856 : scan->xs_cbuf = InvalidBuffer;
128 362856 : scan->xs_continue_hot = false;
129 :
130 362856 : return scan;
131 : }
132 :
133 : /* ----------------
134 : * IndexScanEnd -- End an index scan.
135 : *
136 : * This routine just releases the storage acquired by
137 : * RelationGetIndexScan(). Any AM-level resources are
138 : * assumed to already have been released by the AM's
139 : * endscan routine.
140 : *
141 : * Returns:
142 : * None.
143 : * ----------------
144 : */
145 : void
146 362748 : IndexScanEnd(IndexScanDesc scan)
147 : {
148 362748 : if (scan->keyData != NULL)
149 362390 : pfree(scan->keyData);
150 362748 : if (scan->orderByData != NULL)
151 8 : pfree(scan->orderByData);
152 :
153 362748 : pfree(scan);
154 362748 : }
155 :
156 : /*
157 : * BuildIndexValueDescription
158 : *
159 : * Construct a string describing the contents of an index entry, in the
160 : * form "(key_name, ...)=(key_value, ...)". This is currently used
161 : * for building unique-constraint and exclusion-constraint error messages.
162 : *
163 : * Note that if the user does not have permissions to view all of the
164 : * columns involved then a NULL is returned. Returning a partial key seems
165 : * unlikely to be useful and we have no way to know which of the columns the
166 : * user provided (unlike in ExecBuildSlotValueDescription).
167 : *
168 : * The passed-in values/nulls arrays are the "raw" input to the index AM,
169 : * e.g. results of FormIndexDatum --- this is not necessarily what is stored
170 : * in the index, but it's what the user perceives to be stored.
171 : *
172 : * Note: if you change anything here, check whether
173 : * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar
174 : * change.
175 : */
176 : char *
177 58 : BuildIndexValueDescription(Relation indexRelation,
178 : Datum *values, bool *isnull)
179 : {
180 : StringInfoData buf;
181 : Form_pg_index idxrec;
182 : HeapTuple ht_idx;
183 58 : int natts = indexRelation->rd_rel->relnatts;
184 : int i;
185 : int keyno;
186 58 : Oid indexrelid = RelationGetRelid(indexRelation);
187 : Oid indrelid;
188 : AclResult aclresult;
189 :
190 : /*
191 : * Check permissions- if the user does not have access to view all of the
192 : * key columns then return NULL to avoid leaking data.
193 : *
194 : * First check if RLS is enabled for the relation. If so, return NULL to
195 : * avoid leaking data.
196 : *
197 : * Next we need to check table-level SELECT access and then, if there is
198 : * no access there, check column-level permissions.
199 : */
200 :
201 : /*
202 : * Fetch the pg_index tuple by the Oid of the index
203 : */
204 58 : ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexrelid));
205 58 : if (!HeapTupleIsValid(ht_idx))
206 0 : elog(ERROR, "cache lookup failed for index %u", indexrelid);
207 58 : idxrec = (Form_pg_index) GETSTRUCT(ht_idx);
208 :
209 58 : indrelid = idxrec->indrelid;
210 58 : Assert(indexrelid == idxrec->indexrelid);
211 :
212 : /* RLS check- if RLS is enabled then we don't return anything. */
213 58 : if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED)
214 : {
215 1 : ReleaseSysCache(ht_idx);
216 1 : return NULL;
217 : }
218 :
219 : /* Table-level SELECT is enough, if the user has it */
220 57 : aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT);
221 57 : if (aclresult != ACLCHECK_OK)
222 : {
223 : /*
224 : * No table-level access, so step through the columns in the index and
225 : * make sure the user has SELECT rights on all of them.
226 : */
227 4 : for (keyno = 0; keyno < idxrec->indnatts; keyno++)
228 : {
229 4 : AttrNumber attnum = idxrec->indkey.values[keyno];
230 :
231 : /*
232 : * Note that if attnum == InvalidAttrNumber, then this is an index
233 : * based on an expression and we return no detail rather than try
234 : * to figure out what column(s) the expression includes and if the
235 : * user has SELECT rights on them.
236 : */
237 8 : if (attnum == InvalidAttrNumber ||
238 4 : pg_attribute_aclcheck(indrelid, attnum, GetUserId(),
239 : ACL_SELECT) != ACLCHECK_OK)
240 : {
241 : /* No access, so clean up and return */
242 2 : ReleaseSysCache(ht_idx);
243 2 : return NULL;
244 : }
245 : }
246 : }
247 55 : ReleaseSysCache(ht_idx);
248 :
249 55 : initStringInfo(&buf);
250 55 : appendStringInfo(&buf, "(%s)=(",
251 : pg_get_indexdef_columns(indexrelid, true));
252 :
253 121 : for (i = 0; i < natts; i++)
254 : {
255 : char *val;
256 :
257 66 : if (isnull[i])
258 0 : val = "null";
259 : else
260 : {
261 : Oid foutoid;
262 : bool typisvarlena;
263 :
264 : /*
265 : * The provided data is not necessarily of the type stored in the
266 : * index; rather it is of the index opclass's input type. So look
267 : * at rd_opcintype not the index tupdesc.
268 : *
269 : * Note: this is a bit shaky for opclasses that have pseudotype
270 : * input types such as ANYARRAY or RECORD. Currently, the
271 : * typoutput functions associated with the pseudotypes will work
272 : * okay, but we might have to try harder in future.
273 : */
274 66 : getTypeOutputInfo(indexRelation->rd_opcintype[i],
275 : &foutoid, &typisvarlena);
276 66 : val = OidOutputFunctionCall(foutoid, values[i]);
277 : }
278 :
279 66 : if (i > 0)
280 11 : appendStringInfoString(&buf, ", ");
281 66 : appendStringInfoString(&buf, val);
282 : }
283 :
284 55 : appendStringInfoChar(&buf, ')');
285 :
286 55 : return buf.data;
287 : }
288 :
289 :
290 : /* ----------------------------------------------------------------
291 : * heap-or-index-scan access to system catalogs
292 : *
293 : * These functions support system catalog accesses that normally use
294 : * an index but need to be capable of being switched to heap scans
295 : * if the system indexes are unavailable.
296 : *
297 : * The specified scan keys must be compatible with the named index.
298 : * Generally this means that they must constrain either all columns
299 : * of the index, or the first K columns of an N-column index.
300 : *
301 : * These routines could work with non-system tables, actually,
302 : * but they're only useful when there is a known index to use with
303 : * the given scan keys; so in practice they're only good for
304 : * predetermined types of scans of system catalogs.
305 : * ----------------------------------------------------------------
306 : */
307 :
308 : /*
309 : * systable_beginscan --- set up for heap-or-index scan
310 : *
311 : * rel: catalog to scan, already opened and suitably locked
312 : * indexId: OID of index to conditionally use
313 : * indexOK: if false, forces a heap scan (see notes below)
314 : * snapshot: time qual to use (NULL for a recent catalog snapshot)
315 : * nkeys, key: scan keys
316 : *
317 : * The attribute numbers in the scan key should be set for the heap case.
318 : * If we choose to index, we reset them to 1..n to reference the index
319 : * columns. Note this means there must be one scankey qualification per
320 : * index column! This is checked by the Asserts in the normal, index-using
321 : * case, but won't be checked if the heapscan path is taken.
322 : *
323 : * The routine checks the normal cases for whether an indexscan is safe,
324 : * but caller can make additional checks and pass indexOK=false if needed.
325 : * In standard case indexOK can simply be constant TRUE.
326 : */
327 : SysScanDesc
328 355851 : systable_beginscan(Relation heapRelation,
329 : Oid indexId,
330 : bool indexOK,
331 : Snapshot snapshot,
332 : int nkeys, ScanKey key)
333 : {
334 : SysScanDesc sysscan;
335 : Relation irel;
336 :
337 710019 : if (indexOK &&
338 707664 : !IgnoreSystemIndexes &&
339 353496 : !ReindexIsProcessingIndex(indexId))
340 353490 : irel = index_open(indexId, AccessShareLock);
341 : else
342 2361 : irel = NULL;
343 :
344 355851 : sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
345 :
346 355851 : sysscan->heap_rel = heapRelation;
347 355851 : sysscan->irel = irel;
348 :
349 355851 : if (snapshot == NULL)
350 : {
351 313512 : Oid relid = RelationGetRelid(heapRelation);
352 :
353 313512 : snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
354 313512 : sysscan->snapshot = snapshot;
355 : }
356 : else
357 : {
358 : /* Caller is responsible for any snapshot. */
359 42339 : sysscan->snapshot = NULL;
360 : }
361 :
362 355851 : if (irel)
363 : {
364 : int i;
365 :
366 : /* Change attribute numbers to be index column numbers. */
367 975987 : for (i = 0; i < nkeys; i++)
368 : {
369 : int j;
370 :
371 935973 : for (j = 0; j < irel->rd_index->indnatts; j++)
372 : {
373 935973 : if (key[i].sk_attno == irel->rd_index->indkey.values[j])
374 : {
375 622497 : key[i].sk_attno = j + 1;
376 622497 : break;
377 : }
378 : }
379 622497 : if (j == irel->rd_index->indnatts)
380 0 : elog(ERROR, "column is not in index");
381 : }
382 :
383 353490 : sysscan->iscan = index_beginscan(heapRelation, irel,
384 : snapshot, nkeys, 0);
385 353490 : index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
386 353490 : sysscan->scan = NULL;
387 : }
388 : else
389 : {
390 : /*
391 : * We disallow synchronized scans when forced to use a heapscan on a
392 : * catalog. In most cases the desired rows are near the front, so
393 : * that the unpredictable start point of a syncscan is a serious
394 : * disadvantage; and there are no compensating advantages, because
395 : * it's unlikely that such scans will occur in parallel.
396 : */
397 2361 : sysscan->scan = heap_beginscan_strat(heapRelation, snapshot,
398 : nkeys, key,
399 : true, false);
400 2361 : sysscan->iscan = NULL;
401 : }
402 :
403 355851 : return sysscan;
404 : }
405 :
406 : /*
407 : * systable_getnext --- get next tuple in a heap-or-index scan
408 : *
409 : * Returns NULL if no more tuples available.
410 : *
411 : * Note that returned tuple is a reference to data in a disk buffer;
412 : * it must not be modified, and should be presumed inaccessible after
413 : * next getnext() or endscan() call.
414 : */
415 : HeapTuple
416 631905 : systable_getnext(SysScanDesc sysscan)
417 : {
418 : HeapTuple htup;
419 :
420 631905 : if (sysscan->irel)
421 : {
422 597553 : htup = index_getnext(sysscan->iscan, ForwardScanDirection);
423 :
424 : /*
425 : * We currently don't need to support lossy index operators for any
426 : * system catalog scan. It could be done here, using the scan keys to
427 : * drive the operator calls, if we arranged to save the heap attnums
428 : * during systable_beginscan(); this is practical because we still
429 : * wouldn't need to support indexes on expressions.
430 : */
431 597553 : if (htup && sysscan->iscan->xs_recheck)
432 0 : elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
433 : }
434 : else
435 34352 : htup = heap_getnext(sysscan->scan, ForwardScanDirection);
436 :
437 631905 : return htup;
438 : }
439 :
440 : /*
441 : * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
442 : *
443 : * In particular, determine if this tuple would be visible to a catalog scan
444 : * that started now. We don't handle the case of a non-MVCC scan snapshot,
445 : * because no caller needs that yet.
446 : *
447 : * This is useful to test whether an object was deleted while we waited to
448 : * acquire lock on it.
449 : *
450 : * Note: we don't actually *need* the tuple to be passed in, but it's a
451 : * good crosscheck that the caller is interested in the right tuple.
452 : */
453 : bool
454 9494 : systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
455 : {
456 : Snapshot freshsnap;
457 : bool result;
458 :
459 : /*
460 : * Trust that LockBuffer() and HeapTupleSatisfiesMVCC() do not themselves
461 : * acquire snapshots, so we need not register the snapshot. Those
462 : * facilities are too low-level to have any business scanning tables.
463 : */
464 9494 : freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
465 :
466 9494 : if (sysscan->irel)
467 : {
468 9494 : IndexScanDesc scan = sysscan->iscan;
469 :
470 9494 : Assert(IsMVCCSnapshot(scan->xs_snapshot));
471 9494 : Assert(tup == &scan->xs_ctup);
472 9494 : Assert(BufferIsValid(scan->xs_cbuf));
473 : /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
474 9494 : LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
475 9494 : result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf);
476 9494 : LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
477 : }
478 : else
479 : {
480 0 : HeapScanDesc scan = sysscan->scan;
481 :
482 0 : Assert(IsMVCCSnapshot(scan->rs_snapshot));
483 0 : Assert(tup == &scan->rs_ctup);
484 0 : Assert(BufferIsValid(scan->rs_cbuf));
485 : /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
486 0 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
487 0 : result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf);
488 0 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
489 : }
490 9494 : return result;
491 : }
492 :
493 : /*
494 : * systable_endscan --- close scan, release resources
495 : *
496 : * Note that it's still up to the caller to close the heap relation.
497 : */
498 : void
499 355797 : systable_endscan(SysScanDesc sysscan)
500 : {
501 355797 : if (sysscan->irel)
502 : {
503 353437 : index_endscan(sysscan->iscan);
504 353437 : index_close(sysscan->irel, AccessShareLock);
505 : }
506 : else
507 2360 : heap_endscan(sysscan->scan);
508 :
509 355797 : if (sysscan->snapshot)
510 313458 : UnregisterSnapshot(sysscan->snapshot);
511 :
512 355797 : pfree(sysscan);
513 355797 : }
514 :
515 :
516 : /*
517 : * systable_beginscan_ordered --- set up for ordered catalog scan
518 : *
519 : * These routines have essentially the same API as systable_beginscan etc,
520 : * except that they guarantee to return multiple matching tuples in
521 : * index order. Also, for largely historical reasons, the index to use
522 : * is opened and locked by the caller, not here.
523 : *
524 : * Currently we do not support non-index-based scans here. (In principle
525 : * we could do a heapscan and sort, but the uses are in places that
526 : * probably don't need to still work with corrupted catalog indexes.)
527 : * For the moment, therefore, these functions are merely the thinnest of
528 : * wrappers around index_beginscan/index_getnext. The main reason for their
529 : * existence is to centralize possible future support of lossy operators
530 : * in catalog scans.
531 : */
532 : SysScanDesc
533 1096 : systable_beginscan_ordered(Relation heapRelation,
534 : Relation indexRelation,
535 : Snapshot snapshot,
536 : int nkeys, ScanKey key)
537 : {
538 : SysScanDesc sysscan;
539 : int i;
540 :
541 : /* REINDEX can probably be a hard error here ... */
542 1096 : if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation)))
543 0 : elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed",
544 : RelationGetRelationName(indexRelation));
545 : /* ... but we only throw a warning about violating IgnoreSystemIndexes */
546 1096 : if (IgnoreSystemIndexes)
547 0 : elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes",
548 : RelationGetRelationName(indexRelation));
549 :
550 1096 : sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
551 :
552 1096 : sysscan->heap_rel = heapRelation;
553 1096 : sysscan->irel = indexRelation;
554 :
555 1096 : if (snapshot == NULL)
556 : {
557 536 : Oid relid = RelationGetRelid(heapRelation);
558 :
559 536 : snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
560 536 : sysscan->snapshot = snapshot;
561 : }
562 : else
563 : {
564 : /* Caller is responsible for any snapshot. */
565 560 : sysscan->snapshot = NULL;
566 : }
567 :
568 : /* Change attribute numbers to be index column numbers. */
569 2435 : for (i = 0; i < nkeys; i++)
570 : {
571 : int j;
572 :
573 1809 : for (j = 0; j < indexRelation->rd_index->indnatts; j++)
574 : {
575 1809 : if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j])
576 : {
577 1339 : key[i].sk_attno = j + 1;
578 1339 : break;
579 : }
580 : }
581 1339 : if (j == indexRelation->rd_index->indnatts)
582 0 : elog(ERROR, "column is not in index");
583 : }
584 :
585 1096 : sysscan->iscan = index_beginscan(heapRelation, indexRelation,
586 : snapshot, nkeys, 0);
587 1096 : index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
588 1096 : sysscan->scan = NULL;
589 :
590 1096 : return sysscan;
591 : }
592 :
593 : /*
594 : * systable_getnext_ordered --- get next tuple in an ordered catalog scan
595 : */
596 : HeapTuple
597 4611 : systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
598 : {
599 : HeapTuple htup;
600 :
601 4611 : Assert(sysscan->irel);
602 4611 : htup = index_getnext(sysscan->iscan, direction);
603 : /* See notes in systable_getnext */
604 4611 : if (htup && sysscan->iscan->xs_recheck)
605 0 : elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
606 :
607 4611 : return htup;
608 : }
609 :
610 : /*
611 : * systable_endscan_ordered --- close scan, release resources
612 : */
613 : void
614 1094 : systable_endscan_ordered(SysScanDesc sysscan)
615 : {
616 1094 : Assert(sysscan->irel);
617 1094 : index_endscan(sysscan->iscan);
618 1094 : if (sysscan->snapshot)
619 534 : UnregisterSnapshot(sysscan->snapshot);
620 1094 : pfree(sysscan);
621 1094 : }
|