Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsquery_rewrite.c
4 : * Utilities for reconstructing tsquery
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsquery_rewrite.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "catalog/pg_type.h"
18 : #include "executor/spi.h"
19 : #include "miscadmin.h"
20 : #include "tsearch/ts_utils.h"
21 : #include "utils/builtins.h"
22 :
23 :
24 : /*
25 : * If "node" is equal to "ex", return a copy of "subs" instead.
26 : * If "ex" matches a subset of node's children, return a modified version
27 : * of "node" in which those children are replaced with a copy of "subs".
28 : * Otherwise return "node" unmodified.
29 : *
30 : * The QTN_NOCHANGE bit is set in successfully modified nodes, so that
31 : * we won't uselessly recurse into them.
32 : * Also, set *isfind true if we make a replacement.
33 : */
34 : static QTNode *
35 792 : findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
36 : {
37 : /* Can't match unless signature matches and node type matches. */
38 863 : if ((node->sign & ex->sign) != ex->sign ||
39 71 : node->valnode->type != ex->valnode->type)
40 738 : return node;
41 :
42 : /* Ignore nodes marked NOCHANGE, too. */
43 54 : if (node->flags & QTN_NOCHANGE)
44 13 : return node;
45 :
46 41 : if (node->valnode->type == QI_OPR)
47 : {
48 : /* Must be same operator. */
49 18 : if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper)
50 1 : return node;
51 :
52 17 : if (node->nchild == ex->nchild)
53 : {
54 : /*
55 : * Simple case: when same number of children, match if equal.
56 : * (This is reliable when the children were sorted earlier.)
57 : */
58 4 : if (QTNEq(node, ex))
59 : {
60 : /* Match; delete node and return a copy of subs instead. */
61 2 : QTNFree(node);
62 2 : if (subs)
63 : {
64 2 : node = QTNCopy(subs);
65 2 : node->flags |= QTN_NOCHANGE;
66 : }
67 : else
68 0 : node = NULL;
69 2 : *isfind = true;
70 : }
71 : }
72 13 : else if (node->nchild > ex->nchild && ex->nchild > 0)
73 : {
74 : /*
75 : * AND and OR are commutative/associative, so we should check if a
76 : * subset of the children match. For example, if node is A|B|C,
77 : * and ex is B|C, we have a match after we notionally convert node
78 : * to A|(B|C). This does not work for NOT or PHRASE nodes, but we
79 : * can't get here for those node types because they have a fixed
80 : * number of children.
81 : *
82 : * Because we expect that the children are sorted, it suffices to
83 : * make one pass through the two lists to find the matches.
84 : */
85 : bool *matched;
86 : int nmatched;
87 : int i,
88 : j;
89 :
90 : /* Assert that the subset rule is OK */
91 13 : Assert(node->valnode->qoperator.oper == OP_AND ||
92 : node->valnode->qoperator.oper == OP_OR);
93 :
94 : /* matched[] will record which children of node matched */
95 13 : matched = (bool *) palloc0(node->nchild * sizeof(bool));
96 13 : nmatched = 0;
97 13 : i = j = 0;
98 78 : while (i < node->nchild && j < ex->nchild)
99 : {
100 52 : int cmp = QTNodeCompare(node->child[i], ex->child[j]);
101 :
102 52 : if (cmp == 0)
103 : {
104 : /* match! */
105 32 : matched[i] = true;
106 32 : nmatched++;
107 32 : i++, j++;
108 : }
109 20 : else if (cmp < 0)
110 : {
111 : /* node->child[i] has no match, ignore it */
112 20 : i++;
113 : }
114 : else
115 : {
116 : /* ex->child[j] has no match; we can give up immediately */
117 0 : break;
118 : }
119 : }
120 :
121 13 : if (nmatched == ex->nchild)
122 : {
123 : /* collapse out the matched children of node */
124 13 : j = 0;
125 72 : for (i = 0; i < node->nchild; i++)
126 : {
127 59 : if (matched[i])
128 32 : QTNFree(node->child[i]);
129 : else
130 27 : node->child[j++] = node->child[i];
131 : }
132 :
133 : /* and instead insert a copy of subs */
134 13 : if (subs)
135 : {
136 13 : subs = QTNCopy(subs);
137 13 : subs->flags |= QTN_NOCHANGE;
138 13 : node->child[j++] = subs;
139 : }
140 :
141 13 : node->nchild = j;
142 :
143 : /*
144 : * At this point we might have a node with zero or one child,
145 : * which should be simplified. But we leave it to our caller
146 : * (dofindsubquery) to take care of that.
147 : */
148 :
149 : /*
150 : * Re-sort the node to put new child in the right place. This
151 : * is a bit bogus, because it won't matter for findsubquery's
152 : * remaining processing, and it's insufficient to prepare the
153 : * tree for another search (we would need to re-flatten as
154 : * well, and we don't want to do that because we'd lose the
155 : * QTN_NOCHANGE marking on the new child). But it's needed to
156 : * keep the results the same as the regression tests expect.
157 : */
158 13 : QTNSort(node);
159 :
160 13 : *isfind = true;
161 : }
162 :
163 13 : pfree(matched);
164 : }
165 : }
166 : else
167 : {
168 23 : Assert(node->valnode->type == QI_VAL);
169 :
170 23 : if (node->valnode->qoperand.valcrc != ex->valnode->qoperand.valcrc)
171 0 : return node;
172 23 : else if (QTNEq(node, ex))
173 : {
174 23 : QTNFree(node);
175 23 : if (subs)
176 : {
177 20 : node = QTNCopy(subs);
178 20 : node->flags |= QTN_NOCHANGE;
179 : }
180 : else
181 : {
182 3 : node = NULL;
183 : }
184 23 : *isfind = true;
185 : }
186 : }
187 :
188 40 : return node;
189 : }
190 :
191 : /*
192 : * Recursive guts of findsubquery(): attempt to replace "ex" with "subs"
193 : * at the root node, and if we failed to do so, recursively match against
194 : * child nodes.
195 : *
196 : * Delete any void subtrees resulting from the replacement.
197 : * In the following example '5' is replaced by empty operand:
198 : *
199 : * AND -> 6
200 : * / \
201 : * 5 OR
202 : * / \
203 : * 6 5
204 : */
205 : static QTNode *
206 792 : dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
207 : {
208 : /* since this function recurses, it could be driven to stack overflow. */
209 792 : check_stack_depth();
210 :
211 : /* also, since it's a bit expensive, let's check for query cancel. */
212 792 : CHECK_FOR_INTERRUPTS();
213 :
214 : /* match at the node itself */
215 792 : root = findeq(root, ex, subs, isfind);
216 :
217 : /* unless we matched here, consider matches at child nodes */
218 1546 : if (root && (root->flags & QTN_NOCHANGE) == 0 &&
219 754 : root->valnode->type == QI_OPR)
220 : {
221 : int i,
222 282 : j = 0;
223 :
224 : /*
225 : * Any subtrees that are replaced by NULL must be dropped from the
226 : * tree.
227 : */
228 934 : for (i = 0; i < root->nchild; i++)
229 : {
230 652 : root->child[j] = dofindsubquery(root->child[i], ex, subs, isfind);
231 652 : if (root->child[j])
232 649 : j++;
233 : }
234 :
235 282 : root->nchild = j;
236 :
237 : /*
238 : * If we have just zero or one remaining child node, simplify out this
239 : * operator node.
240 : */
241 282 : if (root->nchild == 0)
242 : {
243 1 : QTNFree(root);
244 1 : root = NULL;
245 : }
246 281 : else if (root->nchild == 1 && root->valnode->qoperator.oper != OP_NOT)
247 : {
248 2 : QTNode *nroot = root->child[0];
249 :
250 2 : pfree(root);
251 2 : root = nroot;
252 : }
253 : }
254 :
255 792 : return root;
256 : }
257 :
258 : /*
259 : * Substitute "subs" for "ex" throughout the QTNode tree at root.
260 : *
261 : * If isfind isn't NULL, set *isfind to show whether we made any substitution.
262 : *
263 : * Both "root" and "ex" must have been through QTNTernary and QTNSort
264 : * to ensure reliable matching.
265 : */
266 : QTNode *
267 140 : findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
268 : {
269 140 : bool DidFind = false;
270 :
271 140 : root = dofindsubquery(root, ex, subs, &DidFind);
272 :
273 140 : if (isfind)
274 0 : *isfind = DidFind;
275 :
276 140 : return root;
277 : }
278 :
279 : Datum
280 22 : tsquery_rewrite_query(PG_FUNCTION_ARGS)
281 : {
282 22 : TSQuery query = PG_GETARG_TSQUERY_COPY(0);
283 22 : text *in = PG_GETARG_TEXT_PP(1);
284 22 : TSQuery rewrited = query;
285 22 : MemoryContext outercontext = CurrentMemoryContext;
286 : MemoryContext oldcontext;
287 : QTNode *tree;
288 : char *buf;
289 : SPIPlanPtr plan;
290 : Portal portal;
291 : bool isnull;
292 :
293 22 : if (query->size == 0)
294 : {
295 0 : PG_FREE_IF_COPY(in, 1);
296 0 : PG_RETURN_POINTER(rewrited);
297 : }
298 :
299 22 : tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
300 22 : QTNTernary(tree);
301 22 : QTNSort(tree);
302 :
303 22 : buf = text_to_cstring(in);
304 :
305 22 : SPI_connect();
306 :
307 22 : if ((plan = SPI_prepare(buf, 0, NULL)) == NULL)
308 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", buf);
309 :
310 22 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
311 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf);
312 :
313 22 : SPI_cursor_fetch(portal, true, 100);
314 :
315 44 : if (SPI_tuptable == NULL ||
316 44 : SPI_tuptable->tupdesc->natts != 2 ||
317 44 : SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID ||
318 22 : SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID)
319 0 : ereport(ERROR,
320 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
321 : errmsg("ts_rewrite query must return two tsquery columns")));
322 :
323 66 : while (SPI_processed > 0 && tree)
324 : {
325 : uint64 i;
326 :
327 154 : for (i = 0; i < SPI_processed && tree; i++)
328 : {
329 132 : Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
330 : Datum sdata;
331 :
332 132 : if (isnull)
333 0 : continue;
334 :
335 132 : sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull);
336 :
337 132 : if (!isnull)
338 : {
339 132 : TSQuery qtex = DatumGetTSQuery(qdata);
340 132 : TSQuery qtsubs = DatumGetTSQuery(sdata);
341 : QTNode *qex,
342 132 : *qsubs = NULL;
343 :
344 132 : if (qtex->size == 0)
345 : {
346 0 : if (qtex != (TSQuery) DatumGetPointer(qdata))
347 0 : pfree(qtex);
348 0 : if (qtsubs != (TSQuery) DatumGetPointer(sdata))
349 0 : pfree(qtsubs);
350 0 : continue;
351 : }
352 :
353 132 : qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex));
354 :
355 132 : QTNTernary(qex);
356 132 : QTNSort(qex);
357 :
358 132 : if (qtsubs->size)
359 132 : qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs));
360 :
361 132 : oldcontext = MemoryContextSwitchTo(outercontext);
362 132 : tree = findsubquery(tree, qex, qsubs, NULL);
363 132 : MemoryContextSwitchTo(oldcontext);
364 :
365 132 : QTNFree(qex);
366 132 : if (qtex != (TSQuery) DatumGetPointer(qdata))
367 0 : pfree(qtex);
368 132 : QTNFree(qsubs);
369 132 : if (qtsubs != (TSQuery) DatumGetPointer(sdata))
370 0 : pfree(qtsubs);
371 :
372 132 : if (tree)
373 : {
374 : /* ready the tree for another pass */
375 132 : QTNClearFlags(tree, QTN_NOCHANGE);
376 132 : QTNTernary(tree);
377 132 : QTNSort(tree);
378 : }
379 : }
380 : }
381 :
382 22 : SPI_freetuptable(SPI_tuptable);
383 22 : SPI_cursor_fetch(portal, true, 100);
384 : }
385 :
386 22 : SPI_freetuptable(SPI_tuptable);
387 22 : SPI_cursor_close(portal);
388 22 : SPI_freeplan(plan);
389 22 : SPI_finish();
390 :
391 22 : if (tree)
392 : {
393 22 : QTNBinary(tree);
394 22 : rewrited = QTN2QT(tree);
395 22 : QTNFree(tree);
396 22 : PG_FREE_IF_COPY(query, 0);
397 : }
398 : else
399 : {
400 0 : SET_VARSIZE(rewrited, HDRSIZETQ);
401 0 : rewrited->size = 0;
402 : }
403 :
404 22 : pfree(buf);
405 22 : PG_FREE_IF_COPY(in, 1);
406 22 : PG_RETURN_POINTER(rewrited);
407 : }
408 :
409 : Datum
410 8 : tsquery_rewrite(PG_FUNCTION_ARGS)
411 : {
412 8 : TSQuery query = PG_GETARG_TSQUERY_COPY(0);
413 8 : TSQuery ex = PG_GETARG_TSQUERY(1);
414 8 : TSQuery subst = PG_GETARG_TSQUERY(2);
415 8 : TSQuery rewrited = query;
416 : QTNode *tree,
417 : *qex,
418 8 : *subs = NULL;
419 :
420 8 : if (query->size == 0 || ex->size == 0)
421 : {
422 0 : PG_FREE_IF_COPY(ex, 1);
423 0 : PG_FREE_IF_COPY(subst, 2);
424 0 : PG_RETURN_POINTER(rewrited);
425 : }
426 :
427 8 : tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
428 8 : QTNTernary(tree);
429 8 : QTNSort(tree);
430 :
431 8 : qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex));
432 8 : QTNTernary(qex);
433 8 : QTNSort(qex);
434 :
435 8 : if (subst->size)
436 6 : subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
437 :
438 8 : tree = findsubquery(tree, qex, subs, NULL);
439 :
440 8 : QTNFree(qex);
441 8 : QTNFree(subs);
442 :
443 8 : if (!tree)
444 : {
445 1 : SET_VARSIZE(rewrited, HDRSIZETQ);
446 1 : rewrited->size = 0;
447 1 : PG_FREE_IF_COPY(ex, 1);
448 1 : PG_FREE_IF_COPY(subst, 2);
449 1 : PG_RETURN_POINTER(rewrited);
450 : }
451 : else
452 : {
453 7 : QTNBinary(tree);
454 7 : rewrited = QTN2QT(tree);
455 7 : QTNFree(tree);
456 : }
457 :
458 7 : PG_FREE_IF_COPY(query, 0);
459 7 : PG_FREE_IF_COPY(ex, 1);
460 7 : PG_FREE_IF_COPY(subst, 2);
461 7 : PG_RETURN_POINTER(rewrited);
462 : }
|