|           Line data    Source code 
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * nodeTidscan.c
       4             :  *    Routines to support direct tid scans of relations
       5             :  *
       6             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/executor/nodeTidscan.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : /*
      16             :  * INTERFACE ROUTINES
      17             :  *
      18             :  *      ExecTidScan         scans a relation using tids
      19             :  *      ExecInitTidScan     creates and initializes state info.
      20             :  *      ExecReScanTidScan   rescans the tid relation.
      21             :  *      ExecEndTidScan      releases all storage.
      22             :  */
      23             : #include "postgres.h"
      24             : 
      25             : #include "access/sysattr.h"
      26             : #include "catalog/pg_type.h"
      27             : #include "executor/execdebug.h"
      28             : #include "executor/nodeTidscan.h"
      29             : #include "miscadmin.h"
      30             : #include "optimizer/clauses.h"
      31             : #include "storage/bufmgr.h"
      32             : #include "utils/array.h"
      33             : #include "utils/rel.h"
      34             : 
      35             : 
      36             : #define IsCTIDVar(node)  \
      37             :     ((node) != NULL && \
      38             :      IsA((node), Var) && \
      39             :      ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
      40             :      ((Var *) (node))->varlevelsup == 0)
      41             : 
      42             : /* one element in tss_tidexprs */
      43             : typedef struct TidExpr
      44             : {
      45             :     ExprState  *exprstate;      /* ExprState for a TID-yielding subexpr */
      46             :     bool        isarray;        /* if true, it yields tid[] not just tid */
      47             :     CurrentOfExpr *cexpr;       /* alternatively, we can have CURRENT OF */
      48             : } TidExpr;
      49             : 
      50             : static void TidExprListCreate(TidScanState *tidstate);
      51             : static void TidListEval(TidScanState *tidstate);
      52             : static int  itemptr_comparator(const void *a, const void *b);
      53             : static TupleTableSlot *TidNext(TidScanState *node);
      54             : 
      55             : 
      56             : /*
      57             :  * Extract the qual subexpressions that yield TIDs to search for,
      58             :  * and compile them into ExprStates if they're ordinary expressions.
      59             :  *
      60             :  * CURRENT OF is a special case that we can't compile usefully;
      61             :  * just drop it into the TidExpr list as-is.
      62             :  */
      63             : static void
      64          74 : TidExprListCreate(TidScanState *tidstate)
      65             : {
      66          74 :     TidScan    *node = (TidScan *) tidstate->ss.ps.plan;
      67             :     ListCell   *l;
      68             : 
      69          74 :     tidstate->tss_tidexprs = NIL;
      70          74 :     tidstate->tss_isCurrentOf = false;
      71             : 
      72         150 :     foreach(l, node->tidquals)
      73             :     {
      74          76 :         Expr       *expr = (Expr *) lfirst(l);
      75          76 :         TidExpr    *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr));
      76             : 
      77          76 :         if (is_opclause(expr))
      78           6 :         {
      79             :             Node       *arg1;
      80             :             Node       *arg2;
      81             : 
      82           6 :             arg1 = get_leftop(expr);
      83           6 :             arg2 = get_rightop(expr);
      84           6 :             if (IsCTIDVar(arg1))
      85           4 :                 tidexpr->exprstate = ExecInitExpr((Expr *) arg2,
      86             :                                                   &tidstate->ss.ps);
      87           2 :             else if (IsCTIDVar(arg2))
      88           2 :                 tidexpr->exprstate = ExecInitExpr((Expr *) arg1,
      89             :                                                   &tidstate->ss.ps);
      90             :             else
      91           0 :                 elog(ERROR, "could not identify CTID variable");
      92           6 :             tidexpr->isarray = false;
      93             :         }
      94          70 :         else if (expr && IsA(expr, ScalarArrayOpExpr))
      95           5 :         {
      96           5 :             ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr;
      97             : 
      98           5 :             Assert(IsCTIDVar(linitial(saex->args)));
      99           5 :             tidexpr->exprstate = ExecInitExpr(lsecond(saex->args),
     100             :                                               &tidstate->ss.ps);
     101           5 :             tidexpr->isarray = true;
     102             :         }
     103          65 :         else if (expr && IsA(expr, CurrentOfExpr))
     104          65 :         {
     105          65 :             CurrentOfExpr *cexpr = (CurrentOfExpr *) expr;
     106             : 
     107          65 :             tidexpr->cexpr = cexpr;
     108          65 :             tidstate->tss_isCurrentOf = true;
     109             :         }
     110             :         else
     111           0 :             elog(ERROR, "could not identify CTID expression");
     112             : 
     113          76 :         tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr);
     114             :     }
     115             : 
     116             :     /* CurrentOfExpr could never appear OR'd with something else */
     117          74 :     Assert(list_length(tidstate->tss_tidexprs) == 1 ||
     118             :            !tidstate->tss_isCurrentOf);
     119          74 : }
     120             : 
     121             : /*
     122             :  * Compute the list of TIDs to be visited, by evaluating the expressions
     123             :  * for them.
     124             :  *
     125             :  * (The result is actually an array, not a list.)
     126             :  */
     127             : static void
     128          62 : TidListEval(TidScanState *tidstate)
     129             : {
     130          62 :     ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
     131             :     BlockNumber nblocks;
     132             :     ItemPointerData *tidList;
     133             :     int         numAllocTids;
     134             :     int         numTids;
     135             :     ListCell   *l;
     136             : 
     137             :     /*
     138             :      * We silently discard any TIDs that are out of range at the time of scan
     139             :      * start.  (Since we hold at least AccessShareLock on the table, it won't
     140             :      * be possible for someone to truncate away the blocks we intend to
     141             :      * visit.)
     142             :      */
     143          62 :     nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation);
     144             : 
     145             :     /*
     146             :      * We initialize the array with enough slots for the case that all quals
     147             :      * are simple OpExprs or CurrentOfExprs.  If there are any
     148             :      * ScalarArrayOpExprs, we may have to enlarge the array.
     149             :      */
     150          62 :     numAllocTids = list_length(tidstate->tss_tidexprs);
     151          62 :     tidList = (ItemPointerData *)
     152          62 :         palloc(numAllocTids * sizeof(ItemPointerData));
     153          62 :     numTids = 0;
     154             : 
     155         115 :     foreach(l, tidstate->tss_tidexprs)
     156             :     {
     157          63 :         TidExpr    *tidexpr = (TidExpr *) lfirst(l);
     158             :         ItemPointer itemptr;
     159             :         bool        isNull;
     160             : 
     161          63 :         if (tidexpr->exprstate && !tidexpr->isarray)
     162             :         {
     163           3 :             itemptr = (ItemPointer)
     164           3 :                 DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
     165             :                                                           econtext,
     166             :                                                           &isNull));
     167           9 :             if (!isNull &&
     168           6 :                 ItemPointerIsValid(itemptr) &&
     169           3 :                 ItemPointerGetBlockNumber(itemptr) < nblocks)
     170             :             {
     171           3 :                 if (numTids >= numAllocTids)
     172             :                 {
     173           1 :                     numAllocTids *= 2;
     174           1 :                     tidList = (ItemPointerData *)
     175           1 :                         repalloc(tidList,
     176             :                                  numAllocTids * sizeof(ItemPointerData));
     177             :                 }
     178           3 :                 tidList[numTids++] = *itemptr;
     179             :             }
     180             :         }
     181          60 :         else if (tidexpr->exprstate && tidexpr->isarray)
     182           4 :         {
     183             :             Datum       arraydatum;
     184             :             ArrayType  *itemarray;
     185             :             Datum      *ipdatums;
     186             :             bool       *ipnulls;
     187             :             int         ndatums;
     188             :             int         i;
     189             : 
     190           4 :             arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate,
     191             :                                                    econtext,
     192             :                                                    &isNull);
     193           4 :             if (isNull)
     194           0 :                 continue;
     195           4 :             itemarray = DatumGetArrayTypeP(arraydatum);
     196           4 :             deconstruct_array(itemarray,
     197             :                               TIDOID, sizeof(ItemPointerData), false, 's',
     198             :                               &ipdatums, &ipnulls, &ndatums);
     199           4 :             if (numTids + ndatums > numAllocTids)
     200             :             {
     201           3 :                 numAllocTids = numTids + ndatums;
     202           3 :                 tidList = (ItemPointerData *)
     203           3 :                     repalloc(tidList,
     204             :                              numAllocTids * sizeof(ItemPointerData));
     205             :             }
     206          12 :             for (i = 0; i < ndatums; i++)
     207             :             {
     208           8 :                 if (!ipnulls[i])
     209             :                 {
     210           8 :                     itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
     211          16 :                     if (ItemPointerIsValid(itemptr) &&
     212           8 :                         ItemPointerGetBlockNumber(itemptr) < nblocks)
     213           8 :                         tidList[numTids++] = *itemptr;
     214             :                 }
     215             :             }
     216           4 :             pfree(ipdatums);
     217           4 :             pfree(ipnulls);
     218             :         }
     219             :         else
     220             :         {
     221             :             ItemPointerData cursor_tid;
     222             : 
     223          56 :             Assert(tidexpr->cexpr);
     224          56 :             if (execCurrentOf(tidexpr->cexpr, econtext,
     225          56 :                               RelationGetRelid(tidstate->ss.ss_currentRelation),
     226             :                               &cursor_tid))
     227             :             {
     228          42 :                 if (numTids >= numAllocTids)
     229             :                 {
     230           0 :                     numAllocTids *= 2;
     231           0 :                     tidList = (ItemPointerData *)
     232           0 :                         repalloc(tidList,
     233             :                                  numAllocTids * sizeof(ItemPointerData));
     234             :                 }
     235          42 :                 tidList[numTids++] = cursor_tid;
     236             :             }
     237             :         }
     238             :     }
     239             : 
     240             :     /*
     241             :      * Sort the array of TIDs into order, and eliminate duplicates.
     242             :      * Eliminating duplicates is necessary since we want OR semantics across
     243             :      * the list.  Sorting makes it easier to detect duplicates, and as a bonus
     244             :      * ensures that we will visit the heap in the most efficient way.
     245             :      */
     246          52 :     if (numTids > 1)
     247             :     {
     248             :         int         lastTid;
     249             :         int         i;
     250             : 
     251             :         /* CurrentOfExpr could never appear OR'd with something else */
     252           4 :         Assert(!tidstate->tss_isCurrentOf);
     253             : 
     254           4 :         qsort((void *) tidList, numTids, sizeof(ItemPointerData),
     255             :               itemptr_comparator);
     256           4 :         lastTid = 0;
     257           9 :         for (i = 1; i < numTids; i++)
     258             :         {
     259           5 :             if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
     260           5 :                 tidList[++lastTid] = tidList[i];
     261             :         }
     262           4 :         numTids = lastTid + 1;
     263             :     }
     264             : 
     265          52 :     tidstate->tss_TidList = tidList;
     266          52 :     tidstate->tss_NumTids = numTids;
     267          52 :     tidstate->tss_TidPtr = -1;
     268          52 : }
     269             : 
     270             : /*
     271             :  * qsort comparator for ItemPointerData items
     272             :  */
     273             : static int
     274           6 : itemptr_comparator(const void *a, const void *b)
     275             : {
     276           6 :     const ItemPointerData *ipa = (const ItemPointerData *) a;
     277           6 :     const ItemPointerData *ipb = (const ItemPointerData *) b;
     278           6 :     BlockNumber ba = ItemPointerGetBlockNumber(ipa);
     279           6 :     BlockNumber bb = ItemPointerGetBlockNumber(ipb);
     280           6 :     OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
     281           6 :     OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
     282             : 
     283           6 :     if (ba < bb)
     284           0 :         return -1;
     285           6 :     if (ba > bb)
     286           0 :         return 1;
     287           6 :     if (oa < ob)
     288           4 :         return -1;
     289           2 :     if (oa > ob)
     290           2 :         return 1;
     291           0 :     return 0;
     292             : }
     293             : 
     294             : /* ----------------------------------------------------------------
     295             :  *      TidNext
     296             :  *
     297             :  *      Retrieve a tuple from the TidScan node's currentRelation
     298             :  *      using the tids in the TidScanState information.
     299             :  *
     300             :  * ----------------------------------------------------------------
     301             :  */
     302             : static TupleTableSlot *
     303         110 : TidNext(TidScanState *node)
     304             : {
     305             :     EState     *estate;
     306             :     ScanDirection direction;
     307             :     Snapshot    snapshot;
     308             :     Relation    heapRelation;
     309             :     HeapTuple   tuple;
     310             :     TupleTableSlot *slot;
     311         110 :     Buffer      buffer = InvalidBuffer;
     312             :     ItemPointerData *tidList;
     313             :     int         numTids;
     314             :     bool        bBackward;
     315             : 
     316             :     /*
     317             :      * extract necessary information from tid scan node
     318             :      */
     319         110 :     estate = node->ss.ps.state;
     320         110 :     direction = estate->es_direction;
     321         110 :     snapshot = estate->es_snapshot;
     322         110 :     heapRelation = node->ss.ss_currentRelation;
     323         110 :     slot = node->ss.ss_ScanTupleSlot;
     324             : 
     325             :     /*
     326             :      * First time through, compute the list of TIDs to be visited
     327             :      */
     328         110 :     if (node->tss_TidList == NULL)
     329          62 :         TidListEval(node);
     330             : 
     331         100 :     tidList = node->tss_TidList;
     332         100 :     numTids = node->tss_NumTids;
     333             : 
     334             :     /*
     335             :      * We use node->tss_htup as the tuple pointer; note this can't just be a
     336             :      * local variable here, as the scan tuple slot will keep a pointer to it.
     337             :      */
     338         100 :     tuple = &(node->tss_htup);
     339             : 
     340             :     /*
     341             :      * Initialize or advance scan position, depending on direction.
     342             :      */
     343         100 :     bBackward = ScanDirectionIsBackward(direction);
     344         100 :     if (bBackward)
     345             :     {
     346           1 :         if (node->tss_TidPtr < 0)
     347             :         {
     348             :             /* initialize for backward scan */
     349           0 :             node->tss_TidPtr = numTids - 1;
     350             :         }
     351             :         else
     352           1 :             node->tss_TidPtr--;
     353             :     }
     354             :     else
     355             :     {
     356          99 :         if (node->tss_TidPtr < 0)
     357             :         {
     358             :             /* initialize for forward scan */
     359          52 :             node->tss_TidPtr = 0;
     360             :         }
     361             :         else
     362          47 :             node->tss_TidPtr++;
     363             :     }
     364             : 
     365         204 :     while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
     366             :     {
     367          53 :         tuple->t_self = tidList[node->tss_TidPtr];
     368             : 
     369             :         /*
     370             :          * For WHERE CURRENT OF, the tuple retrieved from the cursor might
     371             :          * since have been updated; if so, we should fetch the version that is
     372             :          * current according to our snapshot.
     373             :          */
     374          53 :         if (node->tss_isCurrentOf)
     375          42 :             heap_get_latest_tid(heapRelation, snapshot, &tuple->t_self);
     376             : 
     377          53 :         if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL))
     378             :         {
     379             :             /*
     380             :              * store the scanned tuple in the scan tuple slot of the scan
     381             :              * state.  Eventually we will only do this and not return a tuple.
     382             :              * Note: we pass 'false' because tuples returned by amgetnext are
     383             :              * pointers onto disk pages and were not created with palloc() and
     384             :              * so should not be pfree()'d.
     385             :              */
     386          49 :             ExecStoreTuple(tuple,   /* tuple to store */
     387             :                            slot,    /* slot to store in */
     388             :                            buffer,  /* buffer associated with tuple  */
     389             :                            false);  /* don't pfree */
     390             : 
     391             :             /*
     392             :              * At this point we have an extra pin on the buffer, because
     393             :              * ExecStoreTuple incremented the pin count. Drop our local pin.
     394             :              */
     395          49 :             ReleaseBuffer(buffer);
     396             : 
     397          49 :             return slot;
     398             :         }
     399             :         /* Bad TID or failed snapshot qual; try next */
     400           4 :         if (bBackward)
     401           0 :             node->tss_TidPtr--;
     402             :         else
     403           4 :             node->tss_TidPtr++;
     404             : 
     405           4 :         CHECK_FOR_INTERRUPTS();
     406             :     }
     407             : 
     408             :     /*
     409             :      * if we get here it means the tid scan failed so we are at the end of the
     410             :      * scan..
     411             :      */
     412          51 :     return ExecClearTuple(slot);
     413             : }
     414             : 
     415             : /*
     416             :  * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual
     417             :  */
     418             : static bool
     419           0 : TidRecheck(TidScanState *node, TupleTableSlot *slot)
     420             : {
     421             :     /*
     422             :      * XXX shouldn't we check here to make sure tuple matches TID list? In
     423             :      * runtime-key case this is not certain, is it?  However, in the WHERE
     424             :      * CURRENT OF case it might not match anyway ...
     425             :      */
     426           0 :     return true;
     427             : }
     428             : 
     429             : 
     430             : /* ----------------------------------------------------------------
     431             :  *      ExecTidScan(node)
     432             :  *
     433             :  *      Scans the relation using tids and returns
     434             :  *         the next qualifying tuple in the direction specified.
     435             :  *      We call the ExecScan() routine and pass it the appropriate
     436             :  *      access method functions.
     437             :  *
     438             :  *      Conditions:
     439             :  *        -- the "cursor" maintained by the AMI is positioned at the tuple
     440             :  *           returned previously.
     441             :  *
     442             :  *      Initial States:
     443             :  *        -- the relation indicated is opened for scanning so that the
     444             :  *           "cursor" is positioned before the first qualifying tuple.
     445             :  *        -- tidPtr is -1.
     446             :  * ----------------------------------------------------------------
     447             :  */
     448             : static TupleTableSlot *
     449         107 : ExecTidScan(PlanState *pstate)
     450             : {
     451         107 :     TidScanState *node = castNode(TidScanState, pstate);
     452             : 
     453         107 :     return ExecScan(&node->ss,
     454             :                     (ExecScanAccessMtd) TidNext,
     455             :                     (ExecScanRecheckMtd) TidRecheck);
     456             : }
     457             : 
     458             : /* ----------------------------------------------------------------
     459             :  *      ExecReScanTidScan(node)
     460             :  * ----------------------------------------------------------------
     461             :  */
     462             : void
     463           1 : ExecReScanTidScan(TidScanState *node)
     464             : {
     465           1 :     if (node->tss_TidList)
     466           1 :         pfree(node->tss_TidList);
     467           1 :     node->tss_TidList = NULL;
     468           1 :     node->tss_NumTids = 0;
     469           1 :     node->tss_TidPtr = -1;
     470             : 
     471           1 :     ExecScanReScan(&node->ss);
     472           1 : }
     473             : 
     474             : /* ----------------------------------------------------------------
     475             :  *      ExecEndTidScan
     476             :  *
     477             :  *      Releases any storage allocated through C routines.
     478             :  *      Returns nothing.
     479             :  * ----------------------------------------------------------------
     480             :  */
     481             : void
     482          55 : ExecEndTidScan(TidScanState *node)
     483             : {
     484             :     /*
     485             :      * Free the exprcontext
     486             :      */
     487          55 :     ExecFreeExprContext(&node->ss.ps);
     488             : 
     489             :     /*
     490             :      * clear out tuple table slots
     491             :      */
     492          55 :     ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
     493          55 :     ExecClearTuple(node->ss.ss_ScanTupleSlot);
     494             : 
     495             :     /*
     496             :      * close the heap relation.
     497             :      */
     498          55 :     ExecCloseScanRelation(node->ss.ss_currentRelation);
     499          55 : }
     500             : 
     501             : /* ----------------------------------------------------------------
     502             :  *      ExecInitTidScan
     503             :  *
     504             :  *      Initializes the tid scan's state information, creates
     505             :  *      scan keys, and opens the base and tid relations.
     506             :  *
     507             :  *      Parameters:
     508             :  *        node: TidNode node produced by the planner.
     509             :  *        estate: the execution state initialized in InitPlan.
     510             :  * ----------------------------------------------------------------
     511             :  */
     512             : TidScanState *
     513          74 : ExecInitTidScan(TidScan *node, EState *estate, int eflags)
     514             : {
     515             :     TidScanState *tidstate;
     516             :     Relation    currentRelation;
     517             : 
     518             :     /*
     519             :      * create state structure
     520             :      */
     521          74 :     tidstate = makeNode(TidScanState);
     522          74 :     tidstate->ss.ps.plan = (Plan *) node;
     523          74 :     tidstate->ss.ps.state = estate;
     524          74 :     tidstate->ss.ps.ExecProcNode = ExecTidScan;
     525             : 
     526             :     /*
     527             :      * Miscellaneous initialization
     528             :      *
     529             :      * create expression context for node
     530             :      */
     531          74 :     ExecAssignExprContext(estate, &tidstate->ss.ps);
     532             : 
     533             :     /*
     534             :      * initialize child expressions
     535             :      */
     536          74 :     tidstate->ss.ps.qual =
     537          74 :         ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate);
     538             : 
     539          74 :     TidExprListCreate(tidstate);
     540             : 
     541             :     /*
     542             :      * tuple table initialization
     543             :      */
     544          74 :     ExecInitResultTupleSlot(estate, &tidstate->ss.ps);
     545          74 :     ExecInitScanTupleSlot(estate, &tidstate->ss);
     546             : 
     547             :     /*
     548             :      * mark tid list as not computed yet
     549             :      */
     550          74 :     tidstate->tss_TidList = NULL;
     551          74 :     tidstate->tss_NumTids = 0;
     552          74 :     tidstate->tss_TidPtr = -1;
     553             : 
     554             :     /*
     555             :      * open the base relation and acquire appropriate lock on it.
     556             :      */
     557          74 :     currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
     558             : 
     559          74 :     tidstate->ss.ss_currentRelation = currentRelation;
     560          74 :     tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
     561             : 
     562             :     /*
     563             :      * get the scan type from the relation descriptor.
     564             :      */
     565          74 :     ExecAssignScanType(&tidstate->ss, RelationGetDescr(currentRelation));
     566             : 
     567             :     /*
     568             :      * Initialize result tuple type and projection info.
     569             :      */
     570          74 :     ExecAssignResultTypeFromTL(&tidstate->ss.ps);
     571          74 :     ExecAssignScanProjectionInfo(&tidstate->ss);
     572             : 
     573             :     /*
     574             :      * all done.
     575             :      */
     576          74 :     return tidstate;
     577             : }
 |