Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsginidx.c
4 : * GIN support functions for tsvector_ops
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsginidx.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/gin.h"
17 : #include "access/stratnum.h"
18 : #include "miscadmin.h"
19 : #include "tsearch/ts_type.h"
20 : #include "tsearch/ts_utils.h"
21 : #include "utils/builtins.h"
22 :
23 :
24 : Datum
25 301508 : gin_cmp_tslexeme(PG_FUNCTION_ARGS)
26 : {
27 301508 : text *a = PG_GETARG_TEXT_PP(0);
28 301508 : text *b = PG_GETARG_TEXT_PP(1);
29 : int cmp;
30 :
31 1206032 : cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
32 1206032 : VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
33 : false);
34 :
35 301508 : PG_FREE_IF_COPY(a, 0);
36 301508 : PG_FREE_IF_COPY(b, 1);
37 301508 : PG_RETURN_INT32(cmp);
38 : }
39 :
40 : Datum
41 74 : gin_cmp_prefix(PG_FUNCTION_ARGS)
42 : {
43 74 : text *a = PG_GETARG_TEXT_PP(0);
44 74 : text *b = PG_GETARG_TEXT_PP(1);
45 :
46 : #ifdef NOT_USED
47 : StrategyNumber strategy = PG_GETARG_UINT16(2);
48 : Pointer extra_data = PG_GETARG_POINTER(3);
49 : #endif
50 : int cmp;
51 :
52 296 : cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
53 296 : VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
54 : true);
55 :
56 74 : if (cmp < 0)
57 2 : cmp = 1; /* prevent continue scan */
58 :
59 74 : PG_FREE_IF_COPY(a, 0);
60 74 : PG_FREE_IF_COPY(b, 1);
61 74 : PG_RETURN_INT32(cmp);
62 : }
63 :
64 : Datum
65 516 : gin_extract_tsvector(PG_FUNCTION_ARGS)
66 : {
67 516 : TSVector vector = PG_GETARG_TSVECTOR(0);
68 516 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
69 516 : Datum *entries = NULL;
70 :
71 516 : *nentries = vector->size;
72 516 : if (vector->size > 0)
73 : {
74 : int i;
75 507 : WordEntry *we = ARRPTR(vector);
76 :
77 507 : entries = (Datum *) palloc(sizeof(Datum) * vector->size);
78 :
79 29346 : for (i = 0; i < vector->size; i++)
80 : {
81 : text *txt;
82 :
83 28839 : txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
84 28839 : entries[i] = PointerGetDatum(txt);
85 :
86 28839 : we++;
87 : }
88 : }
89 :
90 516 : PG_FREE_IF_COPY(vector, 0);
91 516 : PG_RETURN_POINTER(entries);
92 : }
93 :
94 : Datum
95 38 : gin_extract_tsquery(PG_FUNCTION_ARGS)
96 : {
97 38 : TSQuery query = PG_GETARG_TSQUERY(0);
98 38 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
99 :
100 : /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
101 38 : bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
102 38 : Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
103 :
104 : /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
105 38 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
106 38 : Datum *entries = NULL;
107 :
108 38 : *nentries = 0;
109 :
110 38 : if (query->size > 0)
111 : {
112 38 : QueryItem *item = GETQUERY(query);
113 : int32 i,
114 : j;
115 : bool *partialmatch;
116 : int *map_item_operand;
117 :
118 : /*
119 : * If the query doesn't have any required positive matches (for
120 : * instance, it's something like '! foo'), we have to do a full index
121 : * scan.
122 : */
123 38 : if (tsquery_requires_match(item))
124 36 : *searchMode = GIN_SEARCH_MODE_DEFAULT;
125 : else
126 2 : *searchMode = GIN_SEARCH_MODE_ALL;
127 :
128 : /* count number of VAL items */
129 38 : j = 0;
130 144 : for (i = 0; i < query->size; i++)
131 : {
132 106 : if (item[i].type == QI_VAL)
133 71 : j++;
134 : }
135 38 : *nentries = j;
136 :
137 38 : entries = (Datum *) palloc(sizeof(Datum) * j);
138 38 : partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
139 :
140 : /*
141 : * Make map to convert item's number to corresponding operand's (the
142 : * same, entry's) number. Entry's number is used in check array in
143 : * consistent method. We use the same map for each entry.
144 : */
145 38 : *extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
146 38 : map_item_operand = (int *) palloc0(sizeof(int) * query->size);
147 :
148 : /* Now rescan the VAL items and fill in the arrays */
149 38 : j = 0;
150 144 : for (i = 0; i < query->size; i++)
151 : {
152 106 : if (item[i].type == QI_VAL)
153 : {
154 71 : QueryOperand *val = &item[i].qoperand;
155 : text *txt;
156 :
157 71 : txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
158 71 : val->length);
159 71 : entries[j] = PointerGetDatum(txt);
160 71 : partialmatch[j] = val->prefix;
161 71 : (*extra_data)[j] = (Pointer) map_item_operand;
162 71 : map_item_operand[i] = j;
163 71 : j++;
164 : }
165 : }
166 : }
167 :
168 38 : PG_FREE_IF_COPY(query, 0);
169 :
170 38 : PG_RETURN_POINTER(entries);
171 : }
172 :
173 : typedef struct
174 : {
175 : QueryItem *first_item;
176 : GinTernaryValue *check;
177 : int *map_item_operand;
178 : bool *need_recheck;
179 : } GinChkVal;
180 :
181 : static GinTernaryValue
182 2588 : checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
183 : {
184 : int j;
185 :
186 : /*
187 : * if any val requiring a weight is used or caller needs position
188 : * information then set recheck flag
189 : */
190 2588 : if (val->weight != 0 || data != NULL)
191 0 : *(gcv->need_recheck) = true;
192 :
193 : /* convert item's number to corresponding entry's (operand's) number */
194 2588 : j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
195 :
196 : /* return presence of current entry in indexed value */
197 2588 : return gcv->check[j];
198 : }
199 :
200 : /*
201 : * Wrapper of check condition function for TS_execute.
202 : */
203 : static bool
204 8 : checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
205 : {
206 16 : return checkcondition_gin_internal((GinChkVal *) checkval,
207 : val,
208 8 : data) != GIN_FALSE;
209 : }
210 :
211 : /*
212 : * Evaluate tsquery boolean expression using ternary logic.
213 : */
214 : static GinTernaryValue
215 4751 : TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
216 : {
217 : GinTernaryValue val1,
218 : val2,
219 : result;
220 :
221 : /* since this function recurses, it could be driven to stack overflow */
222 4751 : check_stack_depth();
223 :
224 4751 : if (curitem->type == QI_VAL)
225 2580 : return
226 : checkcondition_gin_internal(gcv,
227 : (QueryOperand *) curitem,
228 : NULL /* don't have position info */ );
229 :
230 2171 : switch (curitem->qoperator.oper)
231 : {
232 : case OP_NOT:
233 : /* In phrase search, always return MAYBE since we lack positions */
234 509 : if (in_phrase)
235 0 : return GIN_MAYBE;
236 509 : result = TS_execute_ternary(gcv, curitem + 1, in_phrase);
237 509 : if (result == GIN_MAYBE)
238 0 : return result;
239 509 : return !result;
240 :
241 : case OP_PHRASE:
242 :
243 : /*
244 : * GIN doesn't contain any information about positions, so treat
245 : * OP_PHRASE as OP_AND with recheck requirement
246 : */
247 3 : *(gcv->need_recheck) = true;
248 : /* Pass down in_phrase == true in case there's a NOT below */
249 3 : in_phrase = true;
250 :
251 : /* FALL THRU */
252 :
253 : case OP_AND:
254 548 : val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
255 : in_phrase);
256 548 : if (val1 == GIN_FALSE)
257 220 : return GIN_FALSE;
258 328 : val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
259 328 : if (val2 == GIN_FALSE)
260 238 : return GIN_FALSE;
261 90 : if (val1 == GIN_TRUE && val2 == GIN_TRUE)
262 87 : return GIN_TRUE;
263 : else
264 3 : return GIN_MAYBE;
265 :
266 : case OP_OR:
267 1114 : val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
268 : in_phrase);
269 1114 : if (val1 == GIN_TRUE)
270 691 : return GIN_TRUE;
271 423 : val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
272 423 : if (val2 == GIN_TRUE)
273 219 : return GIN_TRUE;
274 204 : if (val1 == GIN_FALSE && val2 == GIN_FALSE)
275 197 : return GIN_FALSE;
276 : else
277 7 : return GIN_MAYBE;
278 :
279 : default:
280 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
281 : }
282 :
283 : /* not reachable, but keep compiler quiet */
284 : return false;
285 : }
286 :
287 : Datum
288 4 : gin_tsquery_consistent(PG_FUNCTION_ARGS)
289 : {
290 4 : bool *check = (bool *) PG_GETARG_POINTER(0);
291 :
292 : /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
293 4 : TSQuery query = PG_GETARG_TSQUERY(2);
294 :
295 : /* int32 nkeys = PG_GETARG_INT32(3); */
296 4 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
297 4 : bool *recheck = (bool *) PG_GETARG_POINTER(5);
298 4 : bool res = FALSE;
299 :
300 : /* Initially assume query doesn't require recheck */
301 4 : *recheck = false;
302 :
303 4 : if (query->size > 0)
304 : {
305 : GinChkVal gcv;
306 :
307 : /*
308 : * check-parameter array has one entry for each value (operand) in the
309 : * query.
310 : */
311 4 : gcv.first_item = GETQUERY(query);
312 4 : gcv.check = check;
313 4 : gcv.map_item_operand = (int *) (extra_data[0]);
314 4 : gcv.need_recheck = recheck;
315 :
316 4 : res = TS_execute(GETQUERY(query),
317 : &gcv,
318 : TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS,
319 : checkcondition_gin);
320 : }
321 :
322 4 : PG_RETURN_BOOL(res);
323 : }
324 :
325 : Datum
326 1829 : gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
327 : {
328 1829 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
329 :
330 : /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
331 1829 : TSQuery query = PG_GETARG_TSQUERY(2);
332 :
333 : /* int32 nkeys = PG_GETARG_INT32(3); */
334 1829 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
335 1829 : GinTernaryValue res = GIN_FALSE;
336 : bool recheck;
337 :
338 : /* Initially assume query doesn't require recheck */
339 1829 : recheck = false;
340 :
341 1829 : if (query->size > 0)
342 : {
343 : GinChkVal gcv;
344 :
345 : /*
346 : * check-parameter array has one entry for each value (operand) in the
347 : * query.
348 : */
349 1829 : gcv.first_item = GETQUERY(query);
350 1829 : gcv.check = check;
351 1829 : gcv.map_item_operand = (int *) (extra_data[0]);
352 1829 : gcv.need_recheck = &recheck;
353 :
354 1829 : res = TS_execute_ternary(&gcv, GETQUERY(query), false);
355 :
356 1829 : if (res == GIN_TRUE && recheck)
357 2 : res = GIN_MAYBE;
358 : }
359 :
360 1829 : PG_RETURN_GIN_TERNARY_VALUE(res);
361 : }
362 :
363 : /*
364 : * Formerly, gin_extract_tsvector had only two arguments. Now it has three,
365 : * but we still need a pg_proc entry with two args to support reloading
366 : * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility
367 : * function should go away eventually. (Note: you might say "hey, but the
368 : * code above is only *using* two args, so let's just declare it that way".
369 : * If you try that you'll find the opr_sanity regression test complains.)
370 : */
371 : Datum
372 0 : gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
373 : {
374 0 : if (PG_NARGS() < 3) /* should not happen */
375 0 : elog(ERROR, "gin_extract_tsvector requires three arguments");
376 0 : return gin_extract_tsvector(fcinfo);
377 : }
378 :
379 : /*
380 : * Likewise, we need a stub version of gin_extract_tsquery declared with
381 : * only five arguments.
382 : */
383 : Datum
384 0 : gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
385 : {
386 0 : if (PG_NARGS() < 7) /* should not happen */
387 0 : elog(ERROR, "gin_extract_tsquery requires seven arguments");
388 0 : return gin_extract_tsquery(fcinfo);
389 : }
390 :
391 : /*
392 : * Likewise, we need a stub version of gin_tsquery_consistent declared with
393 : * only six arguments.
394 : */
395 : Datum
396 0 : gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
397 : {
398 0 : if (PG_NARGS() < 8) /* should not happen */
399 0 : elog(ERROR, "gin_tsquery_consistent requires eight arguments");
400 0 : return gin_tsquery_consistent(fcinfo);
401 : }
402 :
403 : /*
404 : * Likewise, a stub version of gin_extract_tsquery declared with argument
405 : * types that are no longer considered appropriate.
406 : */
407 : Datum
408 0 : gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
409 : {
410 0 : return gin_extract_tsquery(fcinfo);
411 : }
412 :
413 : /*
414 : * Likewise, a stub version of gin_tsquery_consistent declared with argument
415 : * types that are no longer considered appropriate.
416 : */
417 : Datum
418 0 : gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
419 : {
420 0 : return gin_tsquery_consistent(fcinfo);
421 : }
|