Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * partition.c
4 : * Partitioning related data structures and functions.
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/catalog/partition.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/heapam.h"
19 : #include "access/htup_details.h"
20 : #include "access/nbtree.h"
21 : #include "access/sysattr.h"
22 : #include "catalog/dependency.h"
23 : #include "catalog/indexing.h"
24 : #include "catalog/objectaddress.h"
25 : #include "catalog/partition.h"
26 : #include "catalog/pg_collation.h"
27 : #include "catalog/pg_inherits.h"
28 : #include "catalog/pg_inherits_fn.h"
29 : #include "catalog/pg_opclass.h"
30 : #include "catalog/pg_type.h"
31 : #include "executor/executor.h"
32 : #include "miscadmin.h"
33 : #include "nodes/makefuncs.h"
34 : #include "nodes/nodeFuncs.h"
35 : #include "nodes/parsenodes.h"
36 : #include "optimizer/clauses.h"
37 : #include "optimizer/planmain.h"
38 : #include "optimizer/var.h"
39 : #include "rewrite/rewriteManip.h"
40 : #include "storage/lmgr.h"
41 : #include "utils/array.h"
42 : #include "utils/builtins.h"
43 : #include "utils/datum.h"
44 : #include "utils/memutils.h"
45 : #include "utils/fmgroids.h"
46 : #include "utils/inval.h"
47 : #include "utils/lsyscache.h"
48 : #include "utils/rel.h"
49 : #include "utils/ruleutils.h"
50 : #include "utils/syscache.h"
51 :
52 : /*
53 : * Information about bounds of a partitioned relation
54 : *
55 : * A list partition datum that is known to be NULL is never put into the
56 : * datums array. Instead, it is tracked using the null_index field.
57 : *
58 : * In the case of range partitioning, ndatums will typically be far less than
59 : * 2 * nparts, because a partition's upper bound and the next partition's lower
60 : * bound are the same in most common cases, and we only store one of them (the
61 : * upper bound).
62 : *
63 : * In the case of list partitioning, the indexes array stores one entry for
64 : * every datum, which is the index of the partition that accepts a given datum.
65 : * In case of range partitioning, it stores one entry per distinct range
66 : * datum, which is the index of the partition for which a given datum
67 : * is an upper bound.
68 : */
69 :
70 : typedef struct PartitionBoundInfoData
71 : {
72 : char strategy; /* list or range bounds? */
73 : int ndatums; /* Length of the datums following array */
74 : Datum **datums; /* Array of datum-tuples with key->partnatts
75 : * datums each */
76 : PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
77 : * NULL for list partitioned tables */
78 : int *indexes; /* Partition indexes; one entry per member of
79 : * the datums array (plus one if range
80 : * partitioned table) */
81 : int null_index; /* Index of the null-accepting partition; -1
82 : * if there isn't one */
83 : } PartitionBoundInfoData;
84 :
85 : #define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
86 :
87 : /*
88 : * When qsort'ing partition bounds after reading from the catalog, each bound
89 : * is represented with one of the following structs.
90 : */
91 :
92 : /* One value coming from some (index'th) list partition */
93 : typedef struct PartitionListValue
94 : {
95 : int index;
96 : Datum value;
97 : } PartitionListValue;
98 :
99 : /* One bound of a range partition */
100 : typedef struct PartitionRangeBound
101 : {
102 : int index;
103 : Datum *datums; /* range bound datums */
104 : PartitionRangeDatumKind *kind; /* the kind of each datum */
105 : bool lower; /* this is the lower (vs upper) bound */
106 : } PartitionRangeBound;
107 :
108 : static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
109 : void *arg);
110 : static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
111 : void *arg);
112 :
113 : static Oid get_partition_operator(PartitionKey key, int col,
114 : StrategyNumber strategy, bool *need_relabel);
115 : static Expr *make_partition_op_expr(PartitionKey key, int keynum,
116 : uint16 strategy, Expr *arg1, Expr *arg2);
117 : static void get_range_key_properties(PartitionKey key, int keynum,
118 : PartitionRangeDatum *ldatum,
119 : PartitionRangeDatum *udatum,
120 : ListCell **partexprs_item,
121 : Expr **keyCol,
122 : Const **lower_val, Const **upper_val);
123 : static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
124 : static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
125 : static List *generate_partition_qual(Relation rel);
126 :
127 : static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
128 : List *datums, bool lower);
129 : static int32 partition_rbound_cmp(PartitionKey key,
130 : Datum *datums1, PartitionRangeDatumKind *kind1,
131 : bool lower1, PartitionRangeBound *b2);
132 : static int32 partition_rbound_datum_cmp(PartitionKey key,
133 : Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
134 : Datum *tuple_datums);
135 :
136 : static int32 partition_bound_cmp(PartitionKey key,
137 : PartitionBoundInfo boundinfo,
138 : int offset, void *probe, bool probe_is_bound);
139 : static int partition_bound_bsearch(PartitionKey key,
140 : PartitionBoundInfo boundinfo,
141 : void *probe, bool probe_is_bound, bool *is_equal);
142 :
143 : /*
144 : * RelationBuildPartitionDesc
145 : * Form rel's partition descriptor
146 : *
147 : * Not flushed from the cache by RelationClearRelation() unless changed because
148 : * of addition or removal of partition.
149 : */
150 : void
151 819 : RelationBuildPartitionDesc(Relation rel)
152 : {
153 : List *inhoids,
154 : *partoids;
155 819 : Oid *oids = NULL;
156 819 : List *boundspecs = NIL;
157 : ListCell *cell;
158 : int i,
159 : nparts;
160 819 : PartitionKey key = RelationGetPartitionKey(rel);
161 : PartitionDesc result;
162 : MemoryContext oldcxt;
163 :
164 819 : int ndatums = 0;
165 :
166 : /* List partitioning specific */
167 819 : PartitionListValue **all_values = NULL;
168 819 : int null_index = -1;
169 :
170 : /* Range partitioning specific */
171 819 : PartitionRangeBound **rbounds = NULL;
172 :
173 : /*
174 : * The following could happen in situations where rel has a pg_class entry
175 : * but not the pg_partitioned_table entry yet.
176 : */
177 819 : if (key == NULL)
178 925 : return;
179 :
180 : /* Get partition oids from pg_inherits */
181 713 : inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
182 :
183 : /* Collect bound spec nodes in a list */
184 713 : i = 0;
185 713 : partoids = NIL;
186 1614 : foreach(cell, inhoids)
187 : {
188 901 : Oid inhrelid = lfirst_oid(cell);
189 : HeapTuple tuple;
190 : Datum datum;
191 : bool isnull;
192 : Node *boundspec;
193 :
194 901 : tuple = SearchSysCache1(RELOID, inhrelid);
195 901 : if (!HeapTupleIsValid(tuple))
196 0 : elog(ERROR, "cache lookup failed for relation %u", inhrelid);
197 :
198 : /*
199 : * It is possible that the pg_class tuple of a partition has not been
200 : * updated yet to set its relpartbound field. The only case where
201 : * this happens is when we open the parent relation to check using its
202 : * partition descriptor that a new partition's bound does not overlap
203 : * some existing partition.
204 : */
205 901 : if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
206 : {
207 135 : ReleaseSysCache(tuple);
208 135 : continue;
209 : }
210 :
211 766 : datum = SysCacheGetAttr(RELOID, tuple,
212 : Anum_pg_class_relpartbound,
213 : &isnull);
214 766 : Assert(!isnull);
215 766 : boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
216 766 : boundspecs = lappend(boundspecs, boundspec);
217 766 : partoids = lappend_oid(partoids, inhrelid);
218 766 : ReleaseSysCache(tuple);
219 : }
220 :
221 713 : nparts = list_length(partoids);
222 :
223 713 : if (nparts > 0)
224 : {
225 367 : oids = (Oid *) palloc(nparts * sizeof(Oid));
226 367 : i = 0;
227 1133 : foreach(cell, partoids)
228 766 : oids[i++] = lfirst_oid(cell);
229 :
230 : /* Convert from node to the internal representation */
231 367 : if (key->strategy == PARTITION_STRATEGY_LIST)
232 : {
233 200 : List *non_null_values = NIL;
234 :
235 : /*
236 : * Create a unified list of non-null values across all partitions.
237 : */
238 200 : i = 0;
239 200 : null_index = -1;
240 561 : foreach(cell, boundspecs)
241 : {
242 361 : PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
243 : lfirst(cell));
244 : ListCell *c;
245 :
246 361 : if (spec->strategy != PARTITION_STRATEGY_LIST)
247 0 : elog(ERROR, "invalid strategy in partition bound spec");
248 :
249 790 : foreach(c, spec->listdatums)
250 : {
251 429 : Const *val = castNode(Const, lfirst(c));
252 429 : PartitionListValue *list_value = NULL;
253 :
254 429 : if (!val->constisnull)
255 : {
256 408 : list_value = (PartitionListValue *)
257 : palloc0(sizeof(PartitionListValue));
258 408 : list_value->index = i;
259 408 : list_value->value = val->constvalue;
260 : }
261 : else
262 : {
263 : /*
264 : * Never put a null into the values array, flag
265 : * instead for the code further down below where we
266 : * construct the actual relcache struct.
267 : */
268 21 : if (null_index != -1)
269 0 : elog(ERROR, "found null more than once");
270 21 : null_index = i;
271 : }
272 :
273 429 : if (list_value)
274 408 : non_null_values = lappend(non_null_values,
275 : list_value);
276 : }
277 :
278 361 : i++;
279 : }
280 :
281 200 : ndatums = list_length(non_null_values);
282 :
283 : /*
284 : * Collect all list values in one array. Alongside the value, we
285 : * also save the index of partition the value comes from.
286 : */
287 200 : all_values = (PartitionListValue **) palloc(ndatums *
288 : sizeof(PartitionListValue *));
289 200 : i = 0;
290 608 : foreach(cell, non_null_values)
291 : {
292 408 : PartitionListValue *src = lfirst(cell);
293 :
294 816 : all_values[i] = (PartitionListValue *)
295 408 : palloc(sizeof(PartitionListValue));
296 408 : all_values[i]->value = src->value;
297 408 : all_values[i]->index = src->index;
298 408 : i++;
299 : }
300 :
301 200 : qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
302 : qsort_partition_list_value_cmp, (void *) key);
303 : }
304 167 : else if (key->strategy == PARTITION_STRATEGY_RANGE)
305 : {
306 : int k;
307 : PartitionRangeBound **all_bounds,
308 : *prev;
309 :
310 167 : all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
311 : sizeof(PartitionRangeBound *));
312 :
313 : /*
314 : * Create a unified list of range bounds across all the
315 : * partitions.
316 : */
317 167 : i = ndatums = 0;
318 572 : foreach(cell, boundspecs)
319 : {
320 405 : PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
321 : lfirst(cell));
322 : PartitionRangeBound *lower,
323 : *upper;
324 :
325 405 : if (spec->strategy != PARTITION_STRATEGY_RANGE)
326 0 : elog(ERROR, "invalid strategy in partition bound spec");
327 :
328 405 : lower = make_one_range_bound(key, i, spec->lowerdatums,
329 : true);
330 405 : upper = make_one_range_bound(key, i, spec->upperdatums,
331 : false);
332 405 : all_bounds[ndatums++] = lower;
333 405 : all_bounds[ndatums++] = upper;
334 405 : i++;
335 : }
336 :
337 167 : Assert(ndatums == nparts * 2);
338 :
339 : /* Sort all the bounds in ascending order */
340 167 : qsort_arg(all_bounds, 2 * nparts,
341 : sizeof(PartitionRangeBound *),
342 : qsort_partition_rbound_cmp,
343 : (void *) key);
344 :
345 : /* Save distinct bounds from all_bounds into rbounds. */
346 167 : rbounds = (PartitionRangeBound **)
347 167 : palloc(ndatums * sizeof(PartitionRangeBound *));
348 167 : k = 0;
349 167 : prev = NULL;
350 977 : for (i = 0; i < ndatums; i++)
351 : {
352 810 : PartitionRangeBound *cur = all_bounds[i];
353 810 : bool is_distinct = false;
354 : int j;
355 :
356 : /* Is the current bound distinct from the previous one? */
357 1264 : for (j = 0; j < key->partnatts; j++)
358 : {
359 : Datum cmpval;
360 :
361 1137 : if (prev == NULL || cur->kind[j] != prev->kind[j])
362 : {
363 276 : is_distinct = true;
364 276 : break;
365 : }
366 :
367 : /*
368 : * If the bounds are both MINVALUE or MAXVALUE, stop now
369 : * and treat them as equal, since any values after this
370 : * point must be ignored.
371 : */
372 861 : if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
373 37 : break;
374 :
375 2472 : cmpval = FunctionCall2Coll(&key->partsupfunc[j],
376 824 : key->partcollation[j],
377 824 : cur->datums[j],
378 824 : prev->datums[j]);
379 824 : if (DatumGetInt32(cmpval) != 0)
380 : {
381 370 : is_distinct = true;
382 370 : break;
383 : }
384 : }
385 :
386 : /*
387 : * Only if the bound is distinct save it into a temporary
388 : * array i.e. rbounds which is later copied into boundinfo
389 : * datums array.
390 : */
391 810 : if (is_distinct)
392 646 : rbounds[k++] = all_bounds[i];
393 :
394 810 : prev = cur;
395 : }
396 :
397 : /* Update ndatums to hold the count of distinct datums. */
398 167 : ndatums = k;
399 : }
400 : else
401 0 : elog(ERROR, "unexpected partition strategy: %d",
402 : (int) key->strategy);
403 : }
404 :
405 : /* Now build the actual relcache partition descriptor */
406 713 : rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
407 713 : RelationGetRelationName(rel),
408 : ALLOCSET_DEFAULT_SIZES);
409 713 : oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
410 :
411 713 : result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
412 713 : result->nparts = nparts;
413 713 : if (nparts > 0)
414 : {
415 : PartitionBoundInfo boundinfo;
416 : int *mapping;
417 367 : int next_index = 0;
418 :
419 367 : result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
420 :
421 367 : boundinfo = (PartitionBoundInfoData *)
422 : palloc0(sizeof(PartitionBoundInfoData));
423 367 : boundinfo->strategy = key->strategy;
424 367 : boundinfo->ndatums = ndatums;
425 367 : boundinfo->null_index = -1;
426 367 : boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
427 :
428 : /* Initialize mapping array with invalid values */
429 367 : mapping = (int *) palloc(sizeof(int) * nparts);
430 1133 : for (i = 0; i < nparts; i++)
431 766 : mapping[i] = -1;
432 :
433 367 : switch (key->strategy)
434 : {
435 : case PARTITION_STRATEGY_LIST:
436 : {
437 200 : boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
438 :
439 : /*
440 : * Copy values. Indexes of individual values are mapped
441 : * to canonical values so that they match for any two list
442 : * partitioned tables with same number of partitions and
443 : * same lists per partition. One way to canonicalize is
444 : * to assign the index in all_values[] of the smallest
445 : * value of each partition, as the index of all of the
446 : * partition's values.
447 : */
448 608 : for (i = 0; i < ndatums; i++)
449 : {
450 408 : boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
451 1224 : boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
452 408 : key->parttypbyval[0],
453 408 : key->parttyplen[0]);
454 :
455 : /* If the old index has no mapping, assign one */
456 408 : if (mapping[all_values[i]->index] == -1)
457 353 : mapping[all_values[i]->index] = next_index++;
458 :
459 408 : boundinfo->indexes[i] = mapping[all_values[i]->index];
460 : }
461 :
462 : /*
463 : * If null-accepting partition has no mapped index yet,
464 : * assign one. This could happen if such partition
465 : * accepts only null and hence not covered in the above
466 : * loop which only handled non-null values.
467 : */
468 200 : if (null_index != -1)
469 : {
470 21 : Assert(null_index >= 0);
471 21 : if (mapping[null_index] == -1)
472 8 : mapping[null_index] = next_index++;
473 21 : boundinfo->null_index = mapping[null_index];
474 : }
475 :
476 : /* All partition must now have a valid mapping */
477 200 : Assert(next_index == nparts);
478 200 : break;
479 : }
480 :
481 : case PARTITION_STRATEGY_RANGE:
482 : {
483 167 : boundinfo->kind = (PartitionRangeDatumKind **)
484 167 : palloc(ndatums *
485 : sizeof(PartitionRangeDatumKind *));
486 167 : boundinfo->indexes = (int *) palloc((ndatums + 1) *
487 : sizeof(int));
488 :
489 813 : for (i = 0; i < ndatums; i++)
490 : {
491 : int j;
492 :
493 646 : boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
494 : sizeof(Datum));
495 1292 : boundinfo->kind[i] = (PartitionRangeDatumKind *)
496 646 : palloc(key->partnatts *
497 : sizeof(PartitionRangeDatumKind));
498 1857 : for (j = 0; j < key->partnatts; j++)
499 : {
500 1211 : if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
501 2026 : boundinfo->datums[i][j] =
502 2026 : datumCopy(rbounds[i]->datums[j],
503 1013 : key->parttypbyval[j],
504 1013 : key->parttyplen[j]);
505 1211 : boundinfo->kind[i][j] = rbounds[i]->kind[j];
506 : }
507 :
508 : /*
509 : * There is no mapping for invalid indexes.
510 : *
511 : * Any lower bounds in the rbounds array have invalid
512 : * indexes assigned, because the values between the
513 : * previous bound (if there is one) and this (lower)
514 : * bound are not part of the range of any existing
515 : * partition.
516 : */
517 646 : if (rbounds[i]->lower)
518 241 : boundinfo->indexes[i] = -1;
519 : else
520 : {
521 405 : int orig_index = rbounds[i]->index;
522 :
523 : /* If the old index has no mapping, assign one */
524 405 : if (mapping[orig_index] == -1)
525 405 : mapping[orig_index] = next_index++;
526 :
527 405 : boundinfo->indexes[i] = mapping[orig_index];
528 : }
529 : }
530 167 : boundinfo->indexes[i] = -1;
531 167 : break;
532 : }
533 :
534 : default:
535 0 : elog(ERROR, "unexpected partition strategy: %d",
536 : (int) key->strategy);
537 : }
538 :
539 367 : result->boundinfo = boundinfo;
540 :
541 : /*
542 : * Now assign OIDs from the original array into mapped indexes of the
543 : * result array. Order of OIDs in the former is defined by the
544 : * catalog scan that retrieved them, whereas that in the latter is
545 : * defined by canonicalized representation of the list values or the
546 : * range bounds.
547 : */
548 1133 : for (i = 0; i < nparts; i++)
549 766 : result->oids[mapping[i]] = oids[i];
550 367 : pfree(mapping);
551 : }
552 :
553 713 : MemoryContextSwitchTo(oldcxt);
554 713 : rel->rd_partdesc = result;
555 : }
556 :
557 : /*
558 : * Are two partition bound collections logically equal?
559 : *
560 : * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
561 : * This is also useful when b1 and b2 are bound collections of two separate
562 : * relations, respectively, because PartitionBoundInfo is a canonical
563 : * representation of partition bounds.
564 : */
565 : bool
566 32 : partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
567 : PartitionBoundInfo b1, PartitionBoundInfo b2)
568 : {
569 : int i;
570 :
571 32 : if (b1->strategy != b2->strategy)
572 0 : return false;
573 :
574 32 : if (b1->ndatums != b2->ndatums)
575 0 : return false;
576 :
577 32 : if (b1->null_index != b2->null_index)
578 0 : return false;
579 :
580 106 : for (i = 0; i < b1->ndatums; i++)
581 : {
582 : int j;
583 :
584 168 : for (j = 0; j < partnatts; j++)
585 : {
586 : /* For range partitions, the bounds might not be finite. */
587 94 : if (b1->kind != NULL)
588 : {
589 : /* The different kinds of bound all differ from each other */
590 59 : if (b1->kind[i][j] != b2->kind[i][j])
591 0 : return false;
592 :
593 : /* Non-finite bounds are equal without further examination. */
594 59 : if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
595 0 : continue;
596 : }
597 :
598 : /*
599 : * Compare the actual values. Note that it would be both incorrect
600 : * and unsafe to invoke the comparison operator derived from the
601 : * partitioning specification here. It would be incorrect because
602 : * we want the relcache entry to be updated for ANY change to the
603 : * partition bounds, not just those that the partitioning operator
604 : * thinks are significant. It would be unsafe because we might
605 : * reach this code in the context of an aborted transaction, and
606 : * an arbitrary partitioning operator might not be safe in that
607 : * context. datumIsEqual() should be simple enough to be safe.
608 : */
609 188 : if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
610 188 : parttypbyval[j], parttyplen[j]))
611 0 : return false;
612 : }
613 :
614 74 : if (b1->indexes[i] != b2->indexes[i])
615 0 : return false;
616 : }
617 :
618 : /* There are ndatums+1 indexes in case of range partitions */
619 43 : if (b1->strategy == PARTITION_STRATEGY_RANGE &&
620 11 : b1->indexes[i] != b2->indexes[i])
621 0 : return false;
622 :
623 32 : return true;
624 : }
625 :
626 : /*
627 : * check_new_partition_bound
628 : *
629 : * Checks if the new partition's bound overlaps any of the existing partitions
630 : * of parent. Also performs additional checks as necessary per strategy.
631 : */
632 : void
633 162 : check_new_partition_bound(char *relname, Relation parent,
634 : PartitionBoundSpec *spec)
635 : {
636 162 : PartitionKey key = RelationGetPartitionKey(parent);
637 162 : PartitionDesc partdesc = RelationGetPartitionDesc(parent);
638 162 : ParseState *pstate = make_parsestate(NULL);
639 162 : int with = -1;
640 162 : bool overlap = false;
641 :
642 162 : switch (key->strategy)
643 : {
644 : case PARTITION_STRATEGY_LIST:
645 : {
646 78 : Assert(spec->strategy == PARTITION_STRATEGY_LIST);
647 :
648 78 : if (partdesc->nparts > 0)
649 : {
650 39 : PartitionBoundInfo boundinfo = partdesc->boundinfo;
651 : ListCell *cell;
652 :
653 39 : Assert(boundinfo &&
654 : boundinfo->strategy == PARTITION_STRATEGY_LIST &&
655 : (boundinfo->ndatums > 0 ||
656 : partition_bound_accepts_nulls(boundinfo)));
657 :
658 81 : foreach(cell, spec->listdatums)
659 : {
660 46 : Const *val = castNode(Const, lfirst(cell));
661 :
662 46 : if (!val->constisnull)
663 : {
664 : int offset;
665 : bool equal;
666 :
667 41 : offset = partition_bound_bsearch(key, boundinfo,
668 41 : &val->constvalue,
669 : true, &equal);
670 41 : if (offset >= 0 && equal)
671 : {
672 3 : overlap = true;
673 3 : with = boundinfo->indexes[offset];
674 3 : break;
675 : }
676 : }
677 5 : else if (partition_bound_accepts_nulls(boundinfo))
678 : {
679 1 : overlap = true;
680 1 : with = boundinfo->null_index;
681 1 : break;
682 : }
683 : }
684 : }
685 :
686 78 : break;
687 : }
688 :
689 : case PARTITION_STRATEGY_RANGE:
690 : {
691 : PartitionRangeBound *lower,
692 : *upper;
693 :
694 84 : Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
695 84 : lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
696 84 : upper = make_one_range_bound(key, -1, spec->upperdatums, false);
697 :
698 : /*
699 : * First check if the resulting range would be empty with
700 : * specified lower and upper bounds
701 : */
702 84 : if (partition_rbound_cmp(key, lower->datums, lower->kind, true,
703 : upper) >= 0)
704 : {
705 2 : ereport(ERROR,
706 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
707 : errmsg("empty range bound specified for partition \"%s\"",
708 : relname),
709 : errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
710 : get_range_partbound_string(spec->lowerdatums),
711 : get_range_partbound_string(spec->upperdatums)),
712 : parser_errposition(pstate, spec->location)));
713 : }
714 :
715 82 : if (partdesc->nparts > 0)
716 : {
717 54 : PartitionBoundInfo boundinfo = partdesc->boundinfo;
718 : int offset;
719 : bool equal;
720 :
721 54 : Assert(boundinfo && boundinfo->ndatums > 0 &&
722 : boundinfo->strategy == PARTITION_STRATEGY_RANGE);
723 :
724 : /*
725 : * Test whether the new lower bound (which is treated
726 : * inclusively as part of the new partition) lies inside
727 : * an existing partition, or in a gap.
728 : *
729 : * If it's inside an existing partition, the bound at
730 : * offset + 1 will be the upper bound of that partition,
731 : * and its index will be >= 0.
732 : *
733 : * If it's in a gap, the bound at offset + 1 will be the
734 : * lower bound of the next partition, and its index will
735 : * be -1. This is also true if there is no next partition,
736 : * since the index array is initialised with an extra -1
737 : * at the end.
738 : */
739 54 : offset = partition_bound_bsearch(key, boundinfo, lower,
740 : true, &equal);
741 :
742 54 : if (boundinfo->indexes[offset + 1] < 0)
743 : {
744 : /*
745 : * Check that the new partition will fit in the gap.
746 : * For it to fit, the new upper bound must be less
747 : * than or equal to the lower bound of the next
748 : * partition, if there is one.
749 : */
750 49 : if (offset + 1 < boundinfo->ndatums)
751 : {
752 : int32 cmpval;
753 :
754 2 : cmpval = partition_bound_cmp(key, boundinfo,
755 : offset + 1, upper,
756 : true);
757 2 : if (cmpval < 0)
758 : {
759 : /*
760 : * The new partition overlaps with the
761 : * existing partition between offset + 1 and
762 : * offset + 2.
763 : */
764 2 : overlap = true;
765 2 : with = boundinfo->indexes[offset + 2];
766 : }
767 : }
768 : }
769 : else
770 : {
771 : /*
772 : * The new partition overlaps with the existing
773 : * partition between offset and offset + 1.
774 : */
775 5 : overlap = true;
776 5 : with = boundinfo->indexes[offset + 1];
777 : }
778 : }
779 :
780 82 : break;
781 : }
782 :
783 : default:
784 0 : elog(ERROR, "unexpected partition strategy: %d",
785 : (int) key->strategy);
786 : }
787 :
788 160 : if (overlap)
789 : {
790 11 : Assert(with >= 0);
791 11 : ereport(ERROR,
792 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
793 : errmsg("partition \"%s\" would overlap partition \"%s\"",
794 : relname, get_rel_name(partdesc->oids[with])),
795 : parser_errposition(pstate, spec->location)));
796 : }
797 149 : }
798 :
799 : /*
800 : * get_partition_parent
801 : *
802 : * Returns inheritance parent of a partition by scanning pg_inherits
803 : *
804 : * Note: Because this function assumes that the relation whose OID is passed
805 : * as an argument will have precisely one parent, it should only be called
806 : * when it is known that the relation is a partition.
807 : */
808 : Oid
809 282 : get_partition_parent(Oid relid)
810 : {
811 : Form_pg_inherits form;
812 : Relation catalogRelation;
813 : SysScanDesc scan;
814 : ScanKeyData key[2];
815 : HeapTuple tuple;
816 : Oid result;
817 :
818 282 : catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
819 :
820 282 : ScanKeyInit(&key[0],
821 : Anum_pg_inherits_inhrelid,
822 : BTEqualStrategyNumber, F_OIDEQ,
823 : ObjectIdGetDatum(relid));
824 282 : ScanKeyInit(&key[1],
825 : Anum_pg_inherits_inhseqno,
826 : BTEqualStrategyNumber, F_INT4EQ,
827 : Int32GetDatum(1));
828 :
829 282 : scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true,
830 : NULL, 2, key);
831 :
832 282 : tuple = systable_getnext(scan);
833 282 : if (!HeapTupleIsValid(tuple))
834 0 : elog(ERROR, "could not find tuple for parent of relation %u", relid);
835 :
836 282 : form = (Form_pg_inherits) GETSTRUCT(tuple);
837 282 : result = form->inhparent;
838 :
839 282 : systable_endscan(scan);
840 282 : heap_close(catalogRelation, AccessShareLock);
841 :
842 282 : return result;
843 : }
844 :
845 : /*
846 : * get_qual_from_partbound
847 : * Given a parser node for partition bound, return the list of executable
848 : * expressions as partition constraint
849 : */
850 : List *
851 174 : get_qual_from_partbound(Relation rel, Relation parent,
852 : PartitionBoundSpec *spec)
853 : {
854 174 : PartitionKey key = RelationGetPartitionKey(parent);
855 174 : List *my_qual = NIL;
856 :
857 174 : Assert(key != NULL);
858 :
859 174 : switch (key->strategy)
860 : {
861 : case PARTITION_STRATEGY_LIST:
862 82 : Assert(spec->strategy == PARTITION_STRATEGY_LIST);
863 82 : my_qual = get_qual_for_list(key, spec);
864 82 : break;
865 :
866 : case PARTITION_STRATEGY_RANGE:
867 92 : Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
868 92 : my_qual = get_qual_for_range(key, spec);
869 92 : break;
870 :
871 : default:
872 0 : elog(ERROR, "unexpected partition strategy: %d",
873 : (int) key->strategy);
874 : }
875 :
876 174 : return my_qual;
877 : }
878 :
879 : /*
880 : * map_partition_varattnos - maps varattno of any Vars in expr from the
881 : * parent attno to partition attno.
882 : *
883 : * We must allow for cases where physical attnos of a partition can be
884 : * different from the parent's.
885 : *
886 : * If found_whole_row is not NULL, *found_whole_row returns whether a
887 : * whole-row variable was found in the input expression.
888 : *
889 : * Note: this will work on any node tree, so really the argument and result
890 : * should be declared "Node *". But a substantial majority of the callers
891 : * are working on Lists, so it's less messy to do the casts internally.
892 : */
893 : List *
894 209 : map_partition_varattnos(List *expr, int target_varno,
895 : Relation partrel, Relation parent,
896 : bool *found_whole_row)
897 : {
898 : AttrNumber *part_attnos;
899 : bool my_found_whole_row;
900 :
901 209 : if (expr == NIL)
902 0 : return NIL;
903 :
904 209 : part_attnos = convert_tuples_by_name_map(RelationGetDescr(partrel),
905 : RelationGetDescr(parent),
906 : gettext_noop("could not convert row type"));
907 418 : expr = (List *) map_variable_attnos((Node *) expr,
908 : target_varno, 0,
909 : part_attnos,
910 209 : RelationGetDescr(parent)->natts,
911 209 : RelationGetForm(partrel)->reltype,
912 : &my_found_whole_row);
913 209 : if (found_whole_row)
914 180 : *found_whole_row = my_found_whole_row;
915 :
916 209 : return expr;
917 : }
918 :
919 : /*
920 : * RelationGetPartitionQual
921 : *
922 : * Returns a list of partition quals
923 : */
924 : List *
925 6386 : RelationGetPartitionQual(Relation rel)
926 : {
927 : /* Quick exit */
928 6386 : if (!rel->rd_rel->relispartition)
929 5778 : return NIL;
930 :
931 608 : return generate_partition_qual(rel);
932 : }
933 :
934 : /*
935 : * get_partition_qual_relid
936 : *
937 : * Returns an expression tree describing the passed-in relation's partition
938 : * constraint.
939 : */
940 : Expr *
941 19 : get_partition_qual_relid(Oid relid)
942 : {
943 19 : Relation rel = heap_open(relid, AccessShareLock);
944 19 : Expr *result = NULL;
945 : List *and_args;
946 :
947 : /* Do the work only if this relation is a partition. */
948 19 : if (rel->rd_rel->relispartition)
949 : {
950 19 : and_args = generate_partition_qual(rel);
951 19 : if (list_length(and_args) > 1)
952 19 : result = makeBoolExpr(AND_EXPR, and_args, -1);
953 : else
954 0 : result = linitial(and_args);
955 : }
956 :
957 : /* Keep the lock. */
958 19 : heap_close(rel, NoLock);
959 :
960 19 : return result;
961 : }
962 :
963 : /*
964 : * Append OIDs of rel's partitions to the list 'partoids' and for each OID,
965 : * append pointer rel to the list 'parents'.
966 : */
967 : #define APPEND_REL_PARTITION_OIDS(rel, partoids, parents) \
968 : do\
969 : {\
970 : int i;\
971 : for (i = 0; i < (rel)->rd_partdesc->nparts; i++)\
972 : {\
973 : (partoids) = lappend_oid((partoids), (rel)->rd_partdesc->oids[i]);\
974 : (parents) = lappend((parents), (rel));\
975 : }\
976 : } while(0)
977 :
978 : /*
979 : * RelationGetPartitionDispatchInfo
980 : * Returns information necessary to route tuples down a partition tree
981 : *
982 : * The number of elements in the returned array (that is, the number of
983 : * PartitionDispatch objects for the partitioned tables in the partition tree)
984 : * is returned in *num_parted and a list of the OIDs of all the leaf
985 : * partitions of rel is returned in *leaf_part_oids.
986 : *
987 : * All the relations in the partition tree (including 'rel') must have been
988 : * locked (using at least the AccessShareLock) by the caller.
989 : */
990 : PartitionDispatch *
991 69 : RelationGetPartitionDispatchInfo(Relation rel,
992 : int *num_parted, List **leaf_part_oids)
993 : {
994 : PartitionDispatchData **pd;
995 69 : List *all_parts = NIL,
996 69 : *all_parents = NIL,
997 : *parted_rels,
998 : *parted_rel_parents;
999 : ListCell *lc1,
1000 : *lc2;
1001 : int i,
1002 : k,
1003 : offset;
1004 :
1005 : /*
1006 : * We rely on the relcache to traverse the partition tree to build both
1007 : * the leaf partition OIDs list and the array of PartitionDispatch objects
1008 : * for the partitioned tables in the tree. That means every partitioned
1009 : * table in the tree must be locked, which is fine since we require the
1010 : * caller to lock all the partitions anyway.
1011 : *
1012 : * For every partitioned table in the tree, starting with the root
1013 : * partitioned table, add its relcache entry to parted_rels, while also
1014 : * queuing its partitions (in the order in which they appear in the
1015 : * partition descriptor) to be looked at later in the same loop. This is
1016 : * a bit tricky but works because the foreach() macro doesn't fetch the
1017 : * next list element until the bottom of the loop.
1018 : */
1019 69 : *num_parted = 1;
1020 69 : parted_rels = list_make1(rel);
1021 : /* Root partitioned table has no parent, so NULL for parent */
1022 69 : parted_rel_parents = list_make1(NULL);
1023 69 : APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents);
1024 332 : forboth(lc1, all_parts, lc2, all_parents)
1025 : {
1026 263 : Oid partrelid = lfirst_oid(lc1);
1027 263 : Relation parent = lfirst(lc2);
1028 :
1029 263 : if (get_rel_relkind(partrelid) == RELKIND_PARTITIONED_TABLE)
1030 : {
1031 : /*
1032 : * Already locked by the caller. Note that it is the
1033 : * responsibility of the caller to close the below relcache entry,
1034 : * once done using the information being collected here (for
1035 : * example, in ExecEndModifyTable).
1036 : */
1037 31 : Relation partrel = heap_open(partrelid, NoLock);
1038 :
1039 31 : (*num_parted)++;
1040 31 : parted_rels = lappend(parted_rels, partrel);
1041 31 : parted_rel_parents = lappend(parted_rel_parents, parent);
1042 31 : APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents);
1043 : }
1044 : }
1045 :
1046 : /*
1047 : * We want to create two arrays - one for leaf partitions and another for
1048 : * partitioned tables (including the root table and internal partitions).
1049 : * While we only create the latter here, leaf partition array of suitable
1050 : * objects (such as, ResultRelInfo) is created by the caller using the
1051 : * list of OIDs we return. Indexes into these arrays get assigned in a
1052 : * breadth-first manner, whereby partitions of any given level are placed
1053 : * consecutively in the respective arrays.
1054 : */
1055 69 : pd = (PartitionDispatchData **) palloc(*num_parted *
1056 : sizeof(PartitionDispatchData *));
1057 69 : *leaf_part_oids = NIL;
1058 69 : i = k = offset = 0;
1059 169 : forboth(lc1, parted_rels, lc2, parted_rel_parents)
1060 : {
1061 100 : Relation partrel = lfirst(lc1);
1062 100 : Relation parent = lfirst(lc2);
1063 100 : PartitionKey partkey = RelationGetPartitionKey(partrel);
1064 100 : TupleDesc tupdesc = RelationGetDescr(partrel);
1065 100 : PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
1066 : int j,
1067 : m;
1068 :
1069 100 : pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
1070 100 : pd[i]->reldesc = partrel;
1071 100 : pd[i]->key = partkey;
1072 100 : pd[i]->keystate = NIL;
1073 100 : pd[i]->partdesc = partdesc;
1074 100 : if (parent != NULL)
1075 : {
1076 : /*
1077 : * For every partitioned table other than root, we must store a
1078 : * tuple table slot initialized with its tuple descriptor and a
1079 : * tuple conversion map to convert a tuple from its parent's
1080 : * rowtype to its own. That is to make sure that we are looking at
1081 : * the correct row using the correct tuple descriptor when
1082 : * computing its partition key for tuple routing.
1083 : */
1084 31 : pd[i]->tupslot = MakeSingleTupleTableSlot(tupdesc);
1085 31 : pd[i]->tupmap = convert_tuples_by_name(RelationGetDescr(parent),
1086 : tupdesc,
1087 : gettext_noop("could not convert row type"));
1088 : }
1089 : else
1090 : {
1091 : /* Not required for the root partitioned table */
1092 69 : pd[i]->tupslot = NULL;
1093 69 : pd[i]->tupmap = NULL;
1094 : }
1095 100 : pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
1096 :
1097 : /*
1098 : * Indexes corresponding to the internal partitions are multiplied by
1099 : * -1 to distinguish them from those of leaf partitions. Encountering
1100 : * an index >= 0 means we found a leaf partition, which is immediately
1101 : * returned as the partition we are looking for. A negative index
1102 : * means we found a partitioned table, whose PartitionDispatch object
1103 : * is located at the above index multiplied back by -1. Using the
1104 : * PartitionDispatch object, search is continued further down the
1105 : * partition tree.
1106 : */
1107 100 : m = 0;
1108 363 : for (j = 0; j < partdesc->nparts; j++)
1109 : {
1110 263 : Oid partrelid = partdesc->oids[j];
1111 :
1112 263 : if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
1113 : {
1114 232 : *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
1115 232 : pd[i]->indexes[j] = k++;
1116 : }
1117 : else
1118 : {
1119 : /*
1120 : * offset denotes the number of partitioned tables of upper
1121 : * levels including those of the current level. Any partition
1122 : * of this table must belong to the next level and hence will
1123 : * be placed after the last partitioned table of this level.
1124 : */
1125 31 : pd[i]->indexes[j] = -(1 + offset + m);
1126 31 : m++;
1127 : }
1128 : }
1129 100 : i++;
1130 :
1131 : /*
1132 : * This counts the number of partitioned tables at upper levels
1133 : * including those of the current level.
1134 : */
1135 100 : offset += m;
1136 : }
1137 :
1138 69 : return pd;
1139 : }
1140 :
1141 : /* Module-local functions */
1142 :
1143 : /*
1144 : * get_partition_operator
1145 : *
1146 : * Return oid of the operator of given strategy for a given partition key
1147 : * column.
1148 : */
1149 : static Oid
1150 467 : get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
1151 : bool *need_relabel)
1152 : {
1153 : Oid operoid;
1154 :
1155 : /*
1156 : * First check if there exists an operator of the given strategy, with
1157 : * this column's type as both its lefttype and righttype, in the
1158 : * partitioning operator family specified for the column.
1159 : */
1160 1401 : operoid = get_opfamily_member(key->partopfamily[col],
1161 467 : key->parttypid[col],
1162 467 : key->parttypid[col],
1163 : strategy);
1164 :
1165 : /*
1166 : * If one doesn't exist, we must resort to using an operator in the same
1167 : * operator family but with the operator class declared input type. It is
1168 : * OK to do so, because the column's type is known to be binary-coercible
1169 : * with the operator class input type (otherwise, the operator class in
1170 : * question would not have been accepted as the partitioning operator
1171 : * class). We must however inform the caller to wrap the non-Const
1172 : * expression with a RelabelType node to denote the implicit coercion. It
1173 : * ensures that the resulting expression structurally matches similarly
1174 : * processed expressions within the optimizer.
1175 : */
1176 467 : if (!OidIsValid(operoid))
1177 : {
1178 9 : operoid = get_opfamily_member(key->partopfamily[col],
1179 3 : key->partopcintype[col],
1180 3 : key->partopcintype[col],
1181 : strategy);
1182 3 : if (!OidIsValid(operoid))
1183 0 : elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
1184 : strategy, key->partopcintype[col], key->partopcintype[col],
1185 : key->partopfamily[col]);
1186 3 : *need_relabel = true;
1187 : }
1188 : else
1189 464 : *need_relabel = false;
1190 :
1191 467 : return operoid;
1192 : }
1193 :
1194 : /*
1195 : * make_partition_op_expr
1196 : * Returns an Expr for the given partition key column with arg1 and
1197 : * arg2 as its leftop and rightop, respectively
1198 : */
1199 : static Expr *
1200 467 : make_partition_op_expr(PartitionKey key, int keynum,
1201 : uint16 strategy, Expr *arg1, Expr *arg2)
1202 : {
1203 : Oid operoid;
1204 467 : bool need_relabel = false;
1205 467 : Expr *result = NULL;
1206 :
1207 : /* Get the correct btree operator for this partitioning column */
1208 467 : operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
1209 :
1210 : /*
1211 : * Chosen operator may be such that the non-Const operand needs to be
1212 : * coerced, so apply the same; see the comment in
1213 : * get_partition_operator().
1214 : */
1215 821 : if (!IsA(arg1, Const) &&
1216 705 : (need_relabel ||
1217 351 : key->partcollation[keynum] != key->parttypcoll[keynum]))
1218 6 : arg1 = (Expr *) makeRelabelType(arg1,
1219 3 : key->partopcintype[keynum],
1220 : -1,
1221 3 : key->partcollation[keynum],
1222 : COERCE_EXPLICIT_CAST);
1223 :
1224 : /* Generate the actual expression */
1225 467 : switch (key->strategy)
1226 : {
1227 : case PARTITION_STRATEGY_LIST:
1228 : {
1229 : ScalarArrayOpExpr *saopexpr;
1230 :
1231 : /* Build leftop = ANY (rightop) */
1232 79 : saopexpr = makeNode(ScalarArrayOpExpr);
1233 79 : saopexpr->opno = operoid;
1234 79 : saopexpr->opfuncid = get_opcode(operoid);
1235 79 : saopexpr->useOr = true;
1236 79 : saopexpr->inputcollid = key->partcollation[keynum];
1237 79 : saopexpr->args = list_make2(arg1, arg2);
1238 79 : saopexpr->location = -1;
1239 :
1240 79 : result = (Expr *) saopexpr;
1241 79 : break;
1242 : }
1243 :
1244 : case PARTITION_STRATEGY_RANGE:
1245 388 : result = make_opclause(operoid,
1246 : BOOLOID,
1247 : false,
1248 : arg1, arg2,
1249 : InvalidOid,
1250 388 : key->partcollation[keynum]);
1251 388 : break;
1252 :
1253 : default:
1254 0 : elog(ERROR, "invalid partitioning strategy");
1255 : break;
1256 : }
1257 :
1258 467 : return result;
1259 : }
1260 :
1261 : /*
1262 : * get_qual_for_list
1263 : *
1264 : * Returns an implicit-AND list of expressions to use as a list partition's
1265 : * constraint, given the partition key and bound structures.
1266 : */
1267 : static List *
1268 82 : get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
1269 : {
1270 : List *result;
1271 : Expr *keyCol;
1272 : ArrayExpr *arr;
1273 : Expr *opexpr;
1274 : NullTest *nulltest;
1275 : ListCell *cell;
1276 82 : List *arrelems = NIL;
1277 82 : bool list_has_null = false;
1278 :
1279 : /*
1280 : * Only single-column list partitioning is supported, so we are worried
1281 : * only about the partition key with index 0.
1282 : */
1283 82 : Assert(key->partnatts == 1);
1284 :
1285 : /* Construct Var or expression representing the partition column */
1286 82 : if (key->partattrs[0] != 0)
1287 292 : keyCol = (Expr *) makeVar(1,
1288 73 : key->partattrs[0],
1289 73 : key->parttypid[0],
1290 73 : key->parttypmod[0],
1291 73 : key->parttypcoll[0],
1292 : 0);
1293 : else
1294 9 : keyCol = (Expr *) copyObject(linitial(key->partexprs));
1295 :
1296 : /* Create list of Consts for the allowed values, excluding any nulls */
1297 177 : foreach(cell, spec->listdatums)
1298 : {
1299 95 : Const *val = castNode(Const, lfirst(cell));
1300 :
1301 95 : if (val->constisnull)
1302 6 : list_has_null = true;
1303 : else
1304 89 : arrelems = lappend(arrelems, copyObject(val));
1305 : }
1306 :
1307 82 : if (arrelems)
1308 : {
1309 : /* Construct an ArrayExpr for the non-null partition values */
1310 79 : arr = makeNode(ArrayExpr);
1311 158 : arr->array_typeid = !type_is_array(key->parttypid[0])
1312 79 : ? get_array_type(key->parttypid[0])
1313 158 : : key->parttypid[0];
1314 79 : arr->array_collid = key->parttypcoll[0];
1315 79 : arr->element_typeid = key->parttypid[0];
1316 79 : arr->elements = arrelems;
1317 79 : arr->multidims = false;
1318 79 : arr->location = -1;
1319 :
1320 : /* Generate the main expression, i.e., keyCol = ANY (arr) */
1321 79 : opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
1322 : keyCol, (Expr *) arr);
1323 : }
1324 : else
1325 : {
1326 : /* If there are no partition values, we don't need an = ANY expr */
1327 3 : opexpr = NULL;
1328 : }
1329 :
1330 82 : if (!list_has_null)
1331 : {
1332 : /*
1333 : * Gin up a "col IS NOT NULL" test that will be AND'd with the main
1334 : * expression. This might seem redundant, but the partition routing
1335 : * machinery needs it.
1336 : */
1337 76 : nulltest = makeNode(NullTest);
1338 76 : nulltest->arg = keyCol;
1339 76 : nulltest->nulltesttype = IS_NOT_NULL;
1340 76 : nulltest->argisrow = false;
1341 76 : nulltest->location = -1;
1342 :
1343 76 : result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
1344 : }
1345 : else
1346 : {
1347 : /*
1348 : * Gin up a "col IS NULL" test that will be OR'd with the main
1349 : * expression.
1350 : */
1351 6 : nulltest = makeNode(NullTest);
1352 6 : nulltest->arg = keyCol;
1353 6 : nulltest->nulltesttype = IS_NULL;
1354 6 : nulltest->argisrow = false;
1355 6 : nulltest->location = -1;
1356 :
1357 6 : if (opexpr)
1358 : {
1359 : Expr *or;
1360 :
1361 3 : or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
1362 3 : result = list_make1(or);
1363 : }
1364 : else
1365 3 : result = list_make1(nulltest);
1366 : }
1367 :
1368 82 : return result;
1369 : }
1370 :
1371 : /*
1372 : * get_range_key_properties
1373 : * Returns range partition key information for a given column
1374 : *
1375 : * This is a subroutine for get_qual_for_range, and its API is pretty
1376 : * specialized to that caller.
1377 : *
1378 : * Constructs an Expr for the key column (returned in *keyCol) and Consts
1379 : * for the lower and upper range limits (returned in *lower_val and
1380 : * *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of
1381 : * a Const. All of these structures are freshly palloc'd.
1382 : *
1383 : * *partexprs_item points to the cell containing the next expression in
1384 : * the key->partexprs list, or NULL. It may be advanced upon return.
1385 : */
1386 : static void
1387 267 : get_range_key_properties(PartitionKey key, int keynum,
1388 : PartitionRangeDatum *ldatum,
1389 : PartitionRangeDatum *udatum,
1390 : ListCell **partexprs_item,
1391 : Expr **keyCol,
1392 : Const **lower_val, Const **upper_val)
1393 : {
1394 : /* Get partition key expression for this column */
1395 267 : if (key->partattrs[keynum] != 0)
1396 : {
1397 824 : *keyCol = (Expr *) makeVar(1,
1398 206 : key->partattrs[keynum],
1399 206 : key->parttypid[keynum],
1400 206 : key->parttypmod[keynum],
1401 206 : key->parttypcoll[keynum],
1402 : 0);
1403 : }
1404 : else
1405 : {
1406 61 : if (*partexprs_item == NULL)
1407 0 : elog(ERROR, "wrong number of partition key expressions");
1408 61 : *keyCol = copyObject(lfirst(*partexprs_item));
1409 61 : *partexprs_item = lnext(*partexprs_item);
1410 : }
1411 :
1412 : /* Get appropriate Const nodes for the bounds */
1413 267 : if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
1414 247 : *lower_val = castNode(Const, copyObject(ldatum->value));
1415 : else
1416 20 : *lower_val = NULL;
1417 :
1418 267 : if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
1419 247 : *upper_val = castNode(Const, copyObject(udatum->value));
1420 : else
1421 20 : *upper_val = NULL;
1422 267 : }
1423 :
1424 : /*
1425 : * get_qual_for_range
1426 : *
1427 : * Returns an implicit-AND list of expressions to use as a range partition's
1428 : * constraint, given the partition key and bound structures.
1429 : *
1430 : * For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
1431 : * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
1432 : * generate an expression tree of the following form:
1433 : *
1434 : * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
1435 : * AND
1436 : * (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
1437 : * AND
1438 : * (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
1439 : *
1440 : * It is often the case that a prefix of lower and upper bound tuples contains
1441 : * the same values, for example, (al = au), in which case, we will emit an
1442 : * expression tree of the following form:
1443 : *
1444 : * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
1445 : * AND
1446 : * (a = al)
1447 : * AND
1448 : * (b > bl OR (b = bl AND c >= cl))
1449 : * AND
1450 : * (b < bu) OR (b = bu AND c < cu))
1451 : *
1452 : * If a bound datum is either MINVALUE or MAXVALUE, these expressions are
1453 : * simplified using the fact that any value is greater than MINVALUE and less
1454 : * than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
1455 : * true, and we need not emit any expression for it, and the last line becomes
1456 : *
1457 : * (b < bu) OR (b = bu), which is simplified to (b <= bu)
1458 : *
1459 : * In most common cases with only one partition column, say a, the following
1460 : * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
1461 : *
1462 : * If we end up with an empty result list, we return a single-member list
1463 : * containing a constant TRUE, because callers expect a non-empty list.
1464 : */
1465 : static List *
1466 92 : get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
1467 : {
1468 92 : List *result = NIL;
1469 : ListCell *cell1,
1470 : *cell2,
1471 : *partexprs_item,
1472 : *partexprs_item_saved;
1473 : int i,
1474 : j;
1475 : PartitionRangeDatum *ldatum,
1476 : *udatum;
1477 : Expr *keyCol;
1478 : Const *lower_val,
1479 : *upper_val;
1480 : NullTest *nulltest;
1481 : List *lower_or_arms,
1482 : *upper_or_arms;
1483 : int num_or_arms,
1484 : current_or_arm;
1485 : ListCell *lower_or_start_datum,
1486 : *upper_or_start_datum;
1487 : bool need_next_lower_arm,
1488 : need_next_upper_arm;
1489 :
1490 92 : lower_or_start_datum = list_head(spec->lowerdatums);
1491 92 : upper_or_start_datum = list_head(spec->upperdatums);
1492 92 : num_or_arms = key->partnatts;
1493 :
1494 : /*
1495 : * A range-partitioned table does not currently allow partition keys to be
1496 : * null, so emit an IS NOT NULL expression for each key column.
1497 : */
1498 92 : partexprs_item = list_head(key->partexprs);
1499 253 : for (i = 0; i < key->partnatts; i++)
1500 : {
1501 : Expr *keyCol;
1502 :
1503 161 : if (key->partattrs[i] != 0)
1504 : {
1505 516 : keyCol = (Expr *) makeVar(1,
1506 129 : key->partattrs[i],
1507 129 : key->parttypid[i],
1508 129 : key->parttypmod[i],
1509 129 : key->parttypcoll[i],
1510 : 0);
1511 : }
1512 : else
1513 : {
1514 32 : if (partexprs_item == NULL)
1515 0 : elog(ERROR, "wrong number of partition key expressions");
1516 32 : keyCol = copyObject(lfirst(partexprs_item));
1517 32 : partexprs_item = lnext(partexprs_item);
1518 : }
1519 :
1520 161 : nulltest = makeNode(NullTest);
1521 161 : nulltest->arg = keyCol;
1522 161 : nulltest->nulltesttype = IS_NOT_NULL;
1523 161 : nulltest->argisrow = false;
1524 161 : nulltest->location = -1;
1525 161 : result = lappend(result, nulltest);
1526 : }
1527 :
1528 : /*
1529 : * Iterate over the key columns and check if the corresponding lower and
1530 : * upper datums are equal using the btree equality operator for the
1531 : * column's type. If equal, we emit single keyCol = common_value
1532 : * expression. Starting from the first column for which the corresponding
1533 : * lower and upper bound datums are not equal, we generate OR expressions
1534 : * as shown in the function's header comment.
1535 : */
1536 92 : i = 0;
1537 92 : partexprs_item = list_head(key->partexprs);
1538 92 : partexprs_item_saved = partexprs_item; /* placate compiler */
1539 127 : forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
1540 : {
1541 : EState *estate;
1542 : MemoryContext oldcxt;
1543 : Expr *test_expr;
1544 : ExprState *test_exprstate;
1545 : Datum test_result;
1546 : bool isNull;
1547 :
1548 127 : ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
1549 127 : udatum = castNode(PartitionRangeDatum, lfirst(cell2));
1550 :
1551 : /*
1552 : * Since get_range_key_properties() modifies partexprs_item, and we
1553 : * might need to start over from the previous expression in the later
1554 : * part of this function, save away the current value.
1555 : */
1556 127 : partexprs_item_saved = partexprs_item;
1557 :
1558 127 : get_range_key_properties(key, i, ldatum, udatum,
1559 : &partexprs_item,
1560 : &keyCol,
1561 : &lower_val, &upper_val);
1562 :
1563 : /*
1564 : * If either value is NULL, the corresponding partition bound is
1565 : * either MINVALUE or MAXVALUE, and we treat them as unequal, because
1566 : * even if they're the same, there is no common value to equate the
1567 : * key column with.
1568 : */
1569 127 : if (!lower_val || !upper_val)
1570 : break;
1571 :
1572 : /* Create the test expression */
1573 113 : estate = CreateExecutorState();
1574 113 : oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
1575 113 : test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
1576 : (Expr *) lower_val,
1577 : (Expr *) upper_val);
1578 113 : fix_opfuncids((Node *) test_expr);
1579 113 : test_exprstate = ExecInitExpr(test_expr, NULL);
1580 113 : test_result = ExecEvalExprSwitchContext(test_exprstate,
1581 113 : GetPerTupleExprContext(estate),
1582 : &isNull);
1583 113 : MemoryContextSwitchTo(oldcxt);
1584 113 : FreeExecutorState(estate);
1585 :
1586 : /* If not equal, go generate the OR expressions */
1587 113 : if (!DatumGetBool(test_result))
1588 78 : break;
1589 :
1590 : /*
1591 : * The bounds for the last key column can't be equal, because such a
1592 : * range partition would never be allowed to be defined (it would have
1593 : * an empty range otherwise).
1594 : */
1595 35 : if (i == key->partnatts - 1)
1596 0 : elog(ERROR, "invalid range bound specification");
1597 :
1598 : /* Equal, so generate keyCol = lower_val expression */
1599 70 : result = lappend(result,
1600 70 : make_partition_op_expr(key, i, BTEqualStrategyNumber,
1601 : keyCol, (Expr *) lower_val));
1602 :
1603 35 : i++;
1604 : }
1605 :
1606 : /* First pair of lower_val and upper_val that are not equal. */
1607 92 : lower_or_start_datum = cell1;
1608 92 : upper_or_start_datum = cell2;
1609 :
1610 : /* OR will have as many arms as there are key columns left. */
1611 92 : num_or_arms = key->partnatts - i;
1612 92 : current_or_arm = 0;
1613 92 : lower_or_arms = upper_or_arms = NIL;
1614 92 : need_next_lower_arm = need_next_upper_arm = true;
1615 204 : while (current_or_arm < num_or_arms)
1616 : {
1617 112 : List *lower_or_arm_args = NIL,
1618 112 : *upper_or_arm_args = NIL;
1619 :
1620 : /* Restart scan of columns from the i'th one */
1621 112 : j = i;
1622 112 : partexprs_item = partexprs_item_saved;
1623 :
1624 140 : for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum)
1625 : {
1626 140 : PartitionRangeDatum *ldatum_next = NULL,
1627 140 : *udatum_next = NULL;
1628 :
1629 140 : ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
1630 140 : if (lnext(cell1))
1631 59 : ldatum_next = castNode(PartitionRangeDatum,
1632 : lfirst(lnext(cell1)));
1633 140 : udatum = castNode(PartitionRangeDatum, lfirst(cell2));
1634 140 : if (lnext(cell2))
1635 59 : udatum_next = castNode(PartitionRangeDatum,
1636 : lfirst(lnext(cell2)));
1637 140 : get_range_key_properties(key, j, ldatum, udatum,
1638 : &partexprs_item,
1639 : &keyCol,
1640 : &lower_val, &upper_val);
1641 :
1642 140 : if (need_next_lower_arm && lower_val)
1643 : {
1644 : uint16 strategy;
1645 :
1646 : /*
1647 : * For the non-last columns of this arm, use the EQ operator.
1648 : * For the last column of this arm, use GT, unless this is the
1649 : * last column of the whole bound check, or the next bound
1650 : * datum is MINVALUE, in which case use GE.
1651 : */
1652 122 : if (j - i < current_or_arm)
1653 22 : strategy = BTEqualStrategyNumber;
1654 100 : else if (j == key->partnatts - 1 ||
1655 24 : (ldatum_next &&
1656 24 : ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
1657 83 : strategy = BTGreaterEqualStrategyNumber;
1658 : else
1659 17 : strategy = BTGreaterStrategyNumber;
1660 :
1661 244 : lower_or_arm_args = lappend(lower_or_arm_args,
1662 244 : make_partition_op_expr(key, j,
1663 : strategy,
1664 : keyCol,
1665 : (Expr *) lower_val));
1666 : }
1667 :
1668 140 : if (need_next_upper_arm && upper_val)
1669 : {
1670 : uint16 strategy;
1671 :
1672 : /*
1673 : * For the non-last columns of this arm, use the EQ operator.
1674 : * For the last column of this arm, use LT, unless the next
1675 : * bound datum is MAXVALUE, in which case use LE.
1676 : */
1677 118 : if (j - i < current_or_arm)
1678 19 : strategy = BTEqualStrategyNumber;
1679 122 : else if (udatum_next &&
1680 23 : udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
1681 5 : strategy = BTLessEqualStrategyNumber;
1682 : else
1683 94 : strategy = BTLessStrategyNumber;
1684 :
1685 236 : upper_or_arm_args = lappend(upper_or_arm_args,
1686 236 : make_partition_op_expr(key, j,
1687 : strategy,
1688 : keyCol,
1689 : (Expr *) upper_val));
1690 : }
1691 :
1692 : /*
1693 : * Did we generate enough of OR's arguments? First arm considers
1694 : * the first of the remaining columns, second arm considers first
1695 : * two of the remaining columns, and so on.
1696 : */
1697 140 : ++j;
1698 140 : if (j - i > current_or_arm)
1699 : {
1700 : /*
1701 : * We must not emit any more arms if the new column that will
1702 : * be considered is unbounded, or this one was.
1703 : */
1704 137 : if (!lower_val || !ldatum_next ||
1705 25 : ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
1706 95 : need_next_lower_arm = false;
1707 137 : if (!upper_val || !udatum_next ||
1708 25 : udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
1709 96 : need_next_upper_arm = false;
1710 112 : break;
1711 : }
1712 : }
1713 :
1714 112 : if (lower_or_arm_args != NIL)
1715 184 : lower_or_arms = lappend(lower_or_arms,
1716 100 : list_length(lower_or_arm_args) > 1
1717 : ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
1718 84 : : linitial(lower_or_arm_args));
1719 :
1720 112 : if (upper_or_arm_args != NIL)
1721 184 : upper_or_arms = lappend(upper_or_arms,
1722 99 : list_length(upper_or_arm_args) > 1
1723 : ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
1724 85 : : linitial(upper_or_arm_args));
1725 :
1726 : /* If no work to do in the next iteration, break away. */
1727 112 : if (!need_next_lower_arm && !need_next_upper_arm)
1728 92 : break;
1729 :
1730 20 : ++current_or_arm;
1731 : }
1732 :
1733 : /*
1734 : * Generate the OR expressions for each of lower and upper bounds (if
1735 : * required), and append to the list of implicitly ANDed list of
1736 : * expressions.
1737 : */
1738 92 : if (lower_or_arms != NIL)
1739 158 : result = lappend(result,
1740 84 : list_length(lower_or_arms) > 1
1741 : ? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
1742 74 : : linitial(lower_or_arms));
1743 92 : if (upper_or_arms != NIL)
1744 161 : result = lappend(result,
1745 85 : list_length(upper_or_arms) > 1
1746 : ? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
1747 76 : : linitial(upper_or_arms));
1748 :
1749 : /* As noted above, caller expects the list to be non-empty. */
1750 92 : if (result == NIL)
1751 0 : result = list_make1(makeBoolConst(true, false));
1752 :
1753 92 : return result;
1754 : }
1755 :
1756 : /*
1757 : * generate_partition_qual
1758 : *
1759 : * Generate partition predicate from rel's partition bound expression
1760 : *
1761 : * Result expression tree is stored CacheMemoryContext to ensure it survives
1762 : * as long as the relcache entry. But we should be running in a less long-lived
1763 : * working context. To avoid leaking cache memory if this routine fails partway
1764 : * through, we build in working memory and then copy the completed structure
1765 : * into cache memory.
1766 : */
1767 : static List *
1768 663 : generate_partition_qual(Relation rel)
1769 : {
1770 : HeapTuple tuple;
1771 : MemoryContext oldcxt;
1772 : Datum boundDatum;
1773 : bool isnull;
1774 : PartitionBoundSpec *bound;
1775 663 : List *my_qual = NIL,
1776 663 : *result = NIL;
1777 : Relation parent;
1778 : bool found_whole_row;
1779 :
1780 : /* Guard against stack overflow due to overly deep partition tree */
1781 663 : check_stack_depth();
1782 :
1783 : /* Quick copy */
1784 663 : if (rel->rd_partcheck != NIL)
1785 522 : return copyObject(rel->rd_partcheck);
1786 :
1787 : /* Grab at least an AccessShareLock on the parent table */
1788 141 : parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
1789 : AccessShareLock);
1790 :
1791 : /* Get pg_class.relpartbound */
1792 141 : tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
1793 141 : if (!HeapTupleIsValid(tuple))
1794 0 : elog(ERROR, "cache lookup failed for relation %u",
1795 : RelationGetRelid(rel));
1796 :
1797 141 : boundDatum = SysCacheGetAttr(RELOID, tuple,
1798 : Anum_pg_class_relpartbound,
1799 : &isnull);
1800 141 : if (isnull) /* should not happen */
1801 0 : elog(ERROR, "relation \"%s\" has relpartbound = null",
1802 : RelationGetRelationName(rel));
1803 141 : bound = castNode(PartitionBoundSpec,
1804 : stringToNode(TextDatumGetCString(boundDatum)));
1805 141 : ReleaseSysCache(tuple);
1806 :
1807 141 : my_qual = get_qual_from_partbound(rel, parent, bound);
1808 :
1809 : /* Add the parent's quals to the list (if any) */
1810 141 : if (parent->rd_rel->relispartition)
1811 36 : result = list_concat(generate_partition_qual(parent), my_qual);
1812 : else
1813 105 : result = my_qual;
1814 :
1815 : /*
1816 : * Change Vars to have partition's attnos instead of the parent's. We do
1817 : * this after we concatenate the parent's quals, because we want every Var
1818 : * in it to bear this relation's attnos. It's safe to assume varno = 1
1819 : * here.
1820 : */
1821 141 : result = map_partition_varattnos(result, 1, rel, parent,
1822 : &found_whole_row);
1823 : /* There can never be a whole-row reference here */
1824 141 : if (found_whole_row)
1825 0 : elog(ERROR, "unexpected whole-row reference found in partition key");
1826 :
1827 : /* Save a copy in the relcache */
1828 141 : oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
1829 141 : rel->rd_partcheck = copyObject(result);
1830 141 : MemoryContextSwitchTo(oldcxt);
1831 :
1832 : /* Keep the parent locked until commit */
1833 141 : heap_close(parent, NoLock);
1834 :
1835 141 : return result;
1836 : }
1837 :
1838 : /* ----------------
1839 : * FormPartitionKeyDatum
1840 : * Construct values[] and isnull[] arrays for the partition key
1841 : * of a tuple.
1842 : *
1843 : * pd Partition dispatch object of the partitioned table
1844 : * slot Heap tuple from which to extract partition key
1845 : * estate executor state for evaluating any partition key
1846 : * expressions (must be non-NULL)
1847 : * values Array of partition key Datums (output area)
1848 : * isnull Array of is-null indicators (output area)
1849 : *
1850 : * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1851 : * the heap tuple passed in.
1852 : * ----------------
1853 : */
1854 : void
1855 261 : FormPartitionKeyDatum(PartitionDispatch pd,
1856 : TupleTableSlot *slot,
1857 : EState *estate,
1858 : Datum *values,
1859 : bool *isnull)
1860 : {
1861 : ListCell *partexpr_item;
1862 : int i;
1863 :
1864 261 : if (pd->key->partexprs != NIL && pd->keystate == NIL)
1865 : {
1866 : /* Check caller has set up context correctly */
1867 30 : Assert(estate != NULL &&
1868 : GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1869 :
1870 : /* First time through, set up expression evaluation state */
1871 30 : pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1872 : }
1873 :
1874 261 : partexpr_item = list_head(pd->keystate);
1875 605 : for (i = 0; i < pd->key->partnatts; i++)
1876 : {
1877 344 : AttrNumber keycol = pd->key->partattrs[i];
1878 : Datum datum;
1879 : bool isNull;
1880 :
1881 344 : if (keycol != 0)
1882 : {
1883 : /* Plain column; get the value directly from the heap tuple */
1884 264 : datum = slot_getattr(slot, keycol, &isNull);
1885 : }
1886 : else
1887 : {
1888 : /* Expression; need to evaluate it */
1889 80 : if (partexpr_item == NULL)
1890 0 : elog(ERROR, "wrong number of partition key expressions");
1891 80 : datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1892 80 : GetPerTupleExprContext(estate),
1893 : &isNull);
1894 80 : partexpr_item = lnext(partexpr_item);
1895 : }
1896 344 : values[i] = datum;
1897 344 : isnull[i] = isNull;
1898 : }
1899 :
1900 261 : if (partexpr_item != NULL)
1901 0 : elog(ERROR, "wrong number of partition key expressions");
1902 261 : }
1903 :
1904 : /*
1905 : * get_partition_for_tuple
1906 : * Finds a leaf partition for tuple contained in *slot
1907 : *
1908 : * Returned value is the sequence number of the leaf partition thus found,
1909 : * or -1 if no leaf partition is found for the tuple. *failed_at is set
1910 : * to the OID of the partitioned table whose partition was not found in
1911 : * the latter case.
1912 : */
1913 : int
1914 177 : get_partition_for_tuple(PartitionDispatch *pd,
1915 : TupleTableSlot *slot,
1916 : EState *estate,
1917 : PartitionDispatchData **failed_at,
1918 : TupleTableSlot **failed_slot)
1919 : {
1920 : PartitionDispatch parent;
1921 : Datum values[PARTITION_MAX_KEYS];
1922 : bool isnull[PARTITION_MAX_KEYS];
1923 : int cur_offset,
1924 : cur_index;
1925 : int i,
1926 : result;
1927 177 : ExprContext *ecxt = GetPerTupleExprContext(estate);
1928 177 : TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
1929 :
1930 : /* start with the root partitioned table */
1931 177 : parent = pd[0];
1932 : while (true)
1933 : {
1934 252 : PartitionKey key = parent->key;
1935 252 : PartitionDesc partdesc = parent->partdesc;
1936 252 : TupleTableSlot *myslot = parent->tupslot;
1937 252 : TupleConversionMap *map = parent->tupmap;
1938 :
1939 252 : if (myslot != NULL && map != NULL)
1940 : {
1941 14 : HeapTuple tuple = ExecFetchSlotTuple(slot);
1942 :
1943 14 : ExecClearTuple(myslot);
1944 14 : tuple = do_convert_tuple(tuple, map);
1945 14 : ExecStoreTuple(tuple, myslot, InvalidBuffer, true);
1946 14 : slot = myslot;
1947 : }
1948 :
1949 : /* Quick exit */
1950 252 : if (partdesc->nparts == 0)
1951 : {
1952 2 : *failed_at = parent;
1953 2 : *failed_slot = slot;
1954 2 : result = -1;
1955 2 : goto error_exit;
1956 : }
1957 :
1958 : /*
1959 : * Extract partition key from tuple. Expression evaluation machinery
1960 : * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
1961 : * point to the correct tuple slot. The slot might have changed from
1962 : * what was used for the parent table if the table of the current
1963 : * partitioning level has different tuple descriptor from the parent.
1964 : * So update ecxt_scantuple accordingly.
1965 : */
1966 250 : ecxt->ecxt_scantuple = slot;
1967 250 : FormPartitionKeyDatum(parent, slot, estate, values, isnull);
1968 :
1969 250 : if (key->strategy == PARTITION_STRATEGY_RANGE)
1970 : {
1971 : /*
1972 : * Since we cannot route tuples with NULL partition keys through a
1973 : * range-partitioned table, simply return that no partition exists
1974 : */
1975 397 : for (i = 0; i < key->partnatts; i++)
1976 : {
1977 238 : if (isnull[i])
1978 : {
1979 1 : *failed_at = parent;
1980 1 : *failed_slot = slot;
1981 1 : result = -1;
1982 1 : goto error_exit;
1983 : }
1984 : }
1985 : }
1986 :
1987 : /*
1988 : * A null partition key is only acceptable if null-accepting list
1989 : * partition exists.
1990 : */
1991 249 : cur_index = -1;
1992 249 : if (isnull[0] && partition_bound_accepts_nulls(partdesc->boundinfo))
1993 3 : cur_index = partdesc->boundinfo->null_index;
1994 246 : else if (!isnull[0])
1995 : {
1996 : /* Else bsearch in partdesc->boundinfo */
1997 245 : bool equal = false;
1998 :
1999 245 : cur_offset = partition_bound_bsearch(key, partdesc->boundinfo,
2000 : values, false, &equal);
2001 245 : switch (key->strategy)
2002 : {
2003 : case PARTITION_STRATEGY_LIST:
2004 86 : if (cur_offset >= 0 && equal)
2005 85 : cur_index = partdesc->boundinfo->indexes[cur_offset];
2006 : else
2007 1 : cur_index = -1;
2008 86 : break;
2009 :
2010 : case PARTITION_STRATEGY_RANGE:
2011 :
2012 : /*
2013 : * Offset returned is such that the bound at offset is
2014 : * found to be less or equal with the tuple. So, the bound
2015 : * at offset+1 would be the upper bound.
2016 : */
2017 159 : cur_index = partdesc->boundinfo->indexes[cur_offset + 1];
2018 159 : break;
2019 :
2020 : default:
2021 0 : elog(ERROR, "unexpected partition strategy: %d",
2022 : (int) key->strategy);
2023 : }
2024 : }
2025 :
2026 : /*
2027 : * cur_index < 0 means we failed to find a partition of this parent.
2028 : * cur_index >= 0 means we either found the leaf partition, or the
2029 : * next parent to find a partition of.
2030 : */
2031 249 : if (cur_index < 0)
2032 : {
2033 8 : result = -1;
2034 8 : *failed_at = parent;
2035 8 : *failed_slot = slot;
2036 8 : break;
2037 : }
2038 241 : else if (parent->indexes[cur_index] >= 0)
2039 : {
2040 166 : result = parent->indexes[cur_index];
2041 166 : break;
2042 : }
2043 : else
2044 75 : parent = pd[-parent->indexes[cur_index]];
2045 75 : }
2046 :
2047 : error_exit:
2048 177 : ecxt->ecxt_scantuple = ecxt_scantuple_old;
2049 177 : return result;
2050 : }
2051 :
2052 : /*
2053 : * qsort_partition_list_value_cmp
2054 : *
2055 : * Compare two list partition bound datums
2056 : */
2057 : static int32
2058 212 : qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
2059 : {
2060 212 : Datum val1 = (*(const PartitionListValue **) a)->value,
2061 212 : val2 = (*(const PartitionListValue **) b)->value;
2062 212 : PartitionKey key = (PartitionKey) arg;
2063 :
2064 212 : return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
2065 : key->partcollation[0],
2066 : val1, val2));
2067 : }
2068 :
2069 : /*
2070 : * make_one_range_bound
2071 : *
2072 : * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
2073 : * and a flag telling whether the bound is lower or not. Made into a function
2074 : * because there are multiple sites that want to use this facility.
2075 : */
2076 : static PartitionRangeBound *
2077 978 : make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
2078 : {
2079 : PartitionRangeBound *bound;
2080 : ListCell *lc;
2081 : int i;
2082 :
2083 978 : bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
2084 978 : bound->index = index;
2085 978 : bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
2086 978 : bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
2087 : sizeof(PartitionRangeDatumKind));
2088 978 : bound->lower = lower;
2089 :
2090 978 : i = 0;
2091 2804 : foreach(lc, datums)
2092 : {
2093 1826 : PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc));
2094 :
2095 : /* What's contained in this range datum? */
2096 1826 : bound->kind[i] = datum->kind;
2097 :
2098 1826 : if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
2099 : {
2100 1550 : Const *val = castNode(Const, datum->value);
2101 :
2102 1550 : if (val->constisnull)
2103 0 : elog(ERROR, "invalid range bound datum");
2104 1550 : bound->datums[i] = val->constvalue;
2105 : }
2106 :
2107 1826 : i++;
2108 : }
2109 :
2110 978 : return bound;
2111 : }
2112 :
2113 : /* Used when sorting range bounds across all range partitions */
2114 : static int32
2115 643 : qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
2116 : {
2117 643 : PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
2118 643 : PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
2119 643 : PartitionKey key = (PartitionKey) arg;
2120 :
2121 643 : return partition_rbound_cmp(key, b1->datums, b1->kind, b1->lower, b2);
2122 : }
2123 :
2124 : /*
2125 : * partition_rbound_cmp
2126 : *
2127 : * Return for two range bounds whether the 1st one (specified in datum1,
2128 : * kind1, and lower1) is <, =, or > the bound specified in *b2.
2129 : *
2130 : * Note that if the values of the two range bounds compare equal, then we take
2131 : * into account whether they are upper or lower bounds, and an upper bound is
2132 : * considered to be smaller than a lower bound. This is important to the way
2133 : * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
2134 : * structure, which only stores the upper bound of a common boundary between
2135 : * two contiguous partitions.
2136 : */
2137 : static int32
2138 862 : partition_rbound_cmp(PartitionKey key,
2139 : Datum *datums1, PartitionRangeDatumKind *kind1,
2140 : bool lower1, PartitionRangeBound *b2)
2141 : {
2142 862 : int32 cmpval = 0; /* placate compiler */
2143 : int i;
2144 862 : Datum *datums2 = b2->datums;
2145 862 : PartitionRangeDatumKind *kind2 = b2->kind;
2146 862 : bool lower2 = b2->lower;
2147 :
2148 1430 : for (i = 0; i < key->partnatts; i++)
2149 : {
2150 : /*
2151 : * First, handle cases where the column is unbounded, which should not
2152 : * invoke the comparison procedure, and should not consider any later
2153 : * columns. Note that the PartitionRangeDatumKind enum elements
2154 : * compare the same way as the values they represent.
2155 : */
2156 1273 : if (kind1[i] < kind2[i])
2157 142 : return -1;
2158 1131 : else if (kind1[i] > kind2[i])
2159 1 : return 1;
2160 1130 : else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
2161 :
2162 : /*
2163 : * The column bounds are both MINVALUE or both MAXVALUE. No later
2164 : * columns should be considered, but we still need to compare
2165 : * whether they are upper or lower bounds.
2166 : */
2167 45 : break;
2168 :
2169 1085 : cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
2170 : key->partcollation[i],
2171 : datums1[i],
2172 : datums2[i]));
2173 1085 : if (cmpval != 0)
2174 517 : break;
2175 : }
2176 :
2177 : /*
2178 : * If the comparison is anything other than equal, we're done. If they
2179 : * compare equal though, we still have to consider whether the boundaries
2180 : * are inclusive or exclusive. Exclusive one is considered smaller of the
2181 : * two.
2182 : */
2183 719 : if (cmpval == 0 && lower1 != lower2)
2184 199 : cmpval = lower1 ? 1 : -1;
2185 :
2186 719 : return cmpval;
2187 : }
2188 :
2189 : /*
2190 : * partition_rbound_datum_cmp
2191 : *
2192 : * Return whether range bound (specified in rb_datums, rb_kind, and rb_lower)
2193 : * is <, =, or > partition key of tuple (tuple_datums)
2194 : */
2195 : static int32
2196 361 : partition_rbound_datum_cmp(PartitionKey key,
2197 : Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
2198 : Datum *tuple_datums)
2199 : {
2200 : int i;
2201 361 : int32 cmpval = -1;
2202 :
2203 555 : for (i = 0; i < key->partnatts; i++)
2204 : {
2205 507 : if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
2206 8 : return -1;
2207 499 : else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
2208 6 : return 1;
2209 :
2210 493 : cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
2211 : key->partcollation[i],
2212 : rb_datums[i],
2213 : tuple_datums[i]));
2214 493 : if (cmpval != 0)
2215 299 : break;
2216 : }
2217 :
2218 347 : return cmpval;
2219 : }
2220 :
2221 : /*
2222 : * partition_bound_cmp
2223 : *
2224 : * Return whether the bound at offset in boundinfo is <, =, or > the argument
2225 : * specified in *probe.
2226 : */
2227 : static int32
2228 722 : partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo,
2229 : int offset, void *probe, bool probe_is_bound)
2230 : {
2231 722 : Datum *bound_datums = boundinfo->datums[offset];
2232 722 : int32 cmpval = -1;
2233 :
2234 722 : switch (key->strategy)
2235 : {
2236 : case PARTITION_STRATEGY_LIST:
2237 226 : cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
2238 : key->partcollation[0],
2239 : bound_datums[0],
2240 : *(Datum *) probe));
2241 226 : break;
2242 :
2243 : case PARTITION_STRATEGY_RANGE:
2244 : {
2245 496 : PartitionRangeDatumKind *kind = boundinfo->kind[offset];
2246 :
2247 496 : if (probe_is_bound)
2248 : {
2249 : /*
2250 : * We need to pass whether the existing bound is a lower
2251 : * bound, so that two equal-valued lower and upper bounds
2252 : * are not regarded equal.
2253 : */
2254 135 : bool lower = boundinfo->indexes[offset] < 0;
2255 :
2256 135 : cmpval = partition_rbound_cmp(key,
2257 : bound_datums, kind, lower,
2258 : (PartitionRangeBound *) probe);
2259 : }
2260 : else
2261 361 : cmpval = partition_rbound_datum_cmp(key,
2262 : bound_datums, kind,
2263 : (Datum *) probe);
2264 496 : break;
2265 : }
2266 :
2267 : default:
2268 0 : elog(ERROR, "unexpected partition strategy: %d",
2269 : (int) key->strategy);
2270 : }
2271 :
2272 722 : return cmpval;
2273 : }
2274 :
2275 : /*
2276 : * Binary search on a collection of partition bounds. Returns greatest
2277 : * bound in array boundinfo->datums which is less than or equal to *probe.
2278 : * If all bounds in the array are greater than *probe, -1 is returned.
2279 : *
2280 : * *probe could either be a partition bound or a Datum array representing
2281 : * the partition key of a tuple being routed; probe_is_bound tells which.
2282 : * We pass that down to the comparison function so that it can interpret the
2283 : * contents of *probe accordingly.
2284 : *
2285 : * *is_equal is set to whether the bound at the returned index is equal with
2286 : * *probe.
2287 : */
2288 : static int
2289 340 : partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo,
2290 : void *probe, bool probe_is_bound, bool *is_equal)
2291 : {
2292 : int lo,
2293 : hi,
2294 : mid;
2295 :
2296 340 : lo = -1;
2297 340 : hi = boundinfo->ndatums - 1;
2298 1261 : while (lo < hi)
2299 : {
2300 : int32 cmpval;
2301 :
2302 720 : mid = (lo + hi + 1) / 2;
2303 720 : cmpval = partition_bound_cmp(key, boundinfo, mid, probe,
2304 : probe_is_bound);
2305 720 : if (cmpval <= 0)
2306 : {
2307 552 : lo = mid;
2308 552 : *is_equal = (cmpval == 0);
2309 :
2310 552 : if (*is_equal)
2311 139 : break;
2312 : }
2313 : else
2314 168 : hi = mid - 1;
2315 : }
2316 :
2317 340 : return lo;
2318 : }
|