Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsvector_op.c
4 : * operations over tsvector
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsvector_op.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include <limits.h>
17 :
18 : #include "access/htup_details.h"
19 : #include "catalog/namespace.h"
20 : #include "catalog/pg_type.h"
21 : #include "commands/trigger.h"
22 : #include "executor/spi.h"
23 : #include "funcapi.h"
24 : #include "mb/pg_wchar.h"
25 : #include "miscadmin.h"
26 : #include "parser/parse_coerce.h"
27 : #include "tsearch/ts_utils.h"
28 : #include "utils/builtins.h"
29 : #include "utils/lsyscache.h"
30 : #include "utils/regproc.h"
31 : #include "utils/rel.h"
32 :
33 :
34 : typedef struct
35 : {
36 : WordEntry *arrb;
37 : WordEntry *arre;
38 : char *values;
39 : char *operand;
40 : } CHKVAL;
41 :
42 :
43 : typedef struct StatEntry
44 : {
45 : uint32 ndoc; /* zero indicates that we were already here
46 : * while walking through the tree */
47 : uint32 nentry;
48 : struct StatEntry *left;
49 : struct StatEntry *right;
50 : uint32 lenlexeme;
51 : char lexeme[FLEXIBLE_ARRAY_MEMBER];
52 : } StatEntry;
53 :
54 : #define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
55 :
56 : typedef struct
57 : {
58 : int32 weight;
59 :
60 : uint32 maxdepth;
61 :
62 : StatEntry **stack;
63 : uint32 stackpos;
64 :
65 : StatEntry *root;
66 : } TSVectorStat;
67 :
68 : #define STATHDRSIZE (offsetof(TSVectorStat, data))
69 :
70 : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
71 : static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
72 :
73 : /*
74 : * Order: haspos, len, word, for all positions (pos, weight)
75 : */
76 : static int
77 0 : silly_cmp_tsvector(const TSVector a, const TSVector b)
78 : {
79 0 : if (VARSIZE(a) < VARSIZE(b))
80 0 : return -1;
81 0 : else if (VARSIZE(a) > VARSIZE(b))
82 0 : return 1;
83 0 : else if (a->size < b->size)
84 0 : return -1;
85 0 : else if (a->size > b->size)
86 0 : return 1;
87 : else
88 : {
89 0 : WordEntry *aptr = ARRPTR(a);
90 0 : WordEntry *bptr = ARRPTR(b);
91 0 : int i = 0;
92 : int res;
93 :
94 :
95 0 : for (i = 0; i < a->size; i++)
96 : {
97 0 : if (aptr->haspos != bptr->haspos)
98 : {
99 0 : return (aptr->haspos > bptr->haspos) ? -1 : 1;
100 : }
101 0 : else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
102 : {
103 0 : return res;
104 : }
105 0 : else if (aptr->haspos)
106 : {
107 0 : WordEntryPos *ap = POSDATAPTR(a, aptr);
108 0 : WordEntryPos *bp = POSDATAPTR(b, bptr);
109 : int j;
110 :
111 0 : if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
112 0 : return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
113 :
114 0 : for (j = 0; j < POSDATALEN(a, aptr); j++)
115 : {
116 0 : if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
117 : {
118 0 : return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
119 : }
120 0 : else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
121 : {
122 0 : return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
123 : }
124 0 : ap++, bp++;
125 : }
126 : }
127 :
128 0 : aptr++;
129 0 : bptr++;
130 : }
131 : }
132 :
133 0 : return 0;
134 : }
135 :
136 : #define TSVECTORCMPFUNC( type, action, ret ) \
137 : Datum \
138 : tsvector_##type(PG_FUNCTION_ARGS) \
139 : { \
140 : TSVector a = PG_GETARG_TSVECTOR(0); \
141 : TSVector b = PG_GETARG_TSVECTOR(1); \
142 : int res = silly_cmp_tsvector(a, b); \
143 : PG_FREE_IF_COPY(a,0); \
144 : PG_FREE_IF_COPY(b,1); \
145 : PG_RETURN_##ret( res action 0 ); \
146 : } \
147 : /* keep compiler quiet - no extra ; */ \
148 : extern int no_such_variable
149 :
150 0 : TSVECTORCMPFUNC(lt, <, BOOL);
151 0 : TSVECTORCMPFUNC(le, <=, BOOL);
152 0 : TSVECTORCMPFUNC(eq, ==, BOOL);
153 0 : TSVECTORCMPFUNC(ge, >=, BOOL);
154 0 : TSVECTORCMPFUNC(gt, >, BOOL);
155 0 : TSVECTORCMPFUNC(ne, !=, BOOL);
156 0 : TSVECTORCMPFUNC(cmp, +, INT32);
157 :
158 : Datum
159 6 : tsvector_strip(PG_FUNCTION_ARGS)
160 : {
161 6 : TSVector in = PG_GETARG_TSVECTOR(0);
162 : TSVector out;
163 : int i,
164 6 : len = 0;
165 6 : WordEntry *arrin = ARRPTR(in),
166 : *arrout;
167 : char *cur;
168 :
169 25 : for (i = 0; i < in->size; i++)
170 19 : len += arrin[i].len;
171 :
172 6 : len = CALCDATASIZE(in->size, len);
173 6 : out = (TSVector) palloc0(len);
174 6 : SET_VARSIZE(out, len);
175 6 : out->size = in->size;
176 6 : arrout = ARRPTR(out);
177 6 : cur = STRPTR(out);
178 25 : for (i = 0; i < in->size; i++)
179 : {
180 19 : memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
181 19 : arrout[i].haspos = 0;
182 19 : arrout[i].len = arrin[i].len;
183 19 : arrout[i].pos = cur - STRPTR(out);
184 19 : cur += arrout[i].len;
185 : }
186 :
187 6 : PG_FREE_IF_COPY(in, 0);
188 6 : PG_RETURN_POINTER(out);
189 : }
190 :
191 : Datum
192 1 : tsvector_length(PG_FUNCTION_ARGS)
193 : {
194 1 : TSVector in = PG_GETARG_TSVECTOR(0);
195 1 : int32 ret = in->size;
196 :
197 1 : PG_FREE_IF_COPY(in, 0);
198 1 : PG_RETURN_INT32(ret);
199 : }
200 :
201 : Datum
202 2 : tsvector_setweight(PG_FUNCTION_ARGS)
203 : {
204 2 : TSVector in = PG_GETARG_TSVECTOR(0);
205 2 : char cw = PG_GETARG_CHAR(1);
206 : TSVector out;
207 : int i,
208 : j;
209 : WordEntry *entry;
210 : WordEntryPos *p;
211 2 : int w = 0;
212 :
213 2 : switch (cw)
214 : {
215 : case 'A':
216 : case 'a':
217 0 : w = 3;
218 0 : break;
219 : case 'B':
220 : case 'b':
221 0 : w = 2;
222 0 : break;
223 : case 'C':
224 : case 'c':
225 2 : w = 1;
226 2 : break;
227 : case 'D':
228 : case 'd':
229 0 : w = 0;
230 0 : break;
231 : default:
232 : /* internal error */
233 0 : elog(ERROR, "unrecognized weight: %d", cw);
234 : }
235 :
236 2 : out = (TSVector) palloc(VARSIZE(in));
237 2 : memcpy(out, in, VARSIZE(in));
238 2 : entry = ARRPTR(out);
239 2 : i = out->size;
240 12 : while (i--)
241 : {
242 8 : if ((j = POSDATALEN(out, entry)) != 0)
243 : {
244 8 : p = POSDATAPTR(out, entry);
245 36 : while (j--)
246 : {
247 20 : WEP_SETWEIGHT(*p, w);
248 20 : p++;
249 : }
250 : }
251 8 : entry++;
252 : }
253 :
254 2 : PG_FREE_IF_COPY(in, 0);
255 2 : PG_RETURN_POINTER(out);
256 : }
257 :
258 : /*
259 : * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
260 : *
261 : * Assign weight w to elements of tsin that are listed in lexemes.
262 : */
263 : Datum
264 5 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
265 : {
266 5 : TSVector tsin = PG_GETARG_TSVECTOR(0);
267 5 : char char_weight = PG_GETARG_CHAR(1);
268 5 : ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
269 :
270 : TSVector tsout;
271 : int i,
272 : j,
273 : nlexemes,
274 : weight;
275 : WordEntry *entry;
276 : Datum *dlexemes;
277 : bool *nulls;
278 :
279 5 : switch (char_weight)
280 : {
281 : case 'A':
282 : case 'a':
283 0 : weight = 3;
284 0 : break;
285 : case 'B':
286 : case 'b':
287 0 : weight = 2;
288 0 : break;
289 : case 'C':
290 : case 'c':
291 5 : weight = 1;
292 5 : break;
293 : case 'D':
294 : case 'd':
295 0 : weight = 0;
296 0 : break;
297 : default:
298 : /* internal error */
299 0 : elog(ERROR, "unrecognized weight: %c", char_weight);
300 : }
301 :
302 5 : tsout = (TSVector) palloc(VARSIZE(tsin));
303 5 : memcpy(tsout, tsin, VARSIZE(tsin));
304 5 : entry = ARRPTR(tsout);
305 :
306 5 : deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
307 : &dlexemes, &nulls, &nlexemes);
308 :
309 : /*
310 : * Assuming that lexemes array is significantly shorter than tsvector we
311 : * can iterate through lexemes performing binary search of each lexeme
312 : * from lexemes in tsvector.
313 : */
314 13 : for (i = 0; i < nlexemes; i++)
315 : {
316 : char *lex;
317 : int lex_len,
318 : lex_pos;
319 :
320 9 : if (nulls[i])
321 1 : ereport(ERROR,
322 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
323 : errmsg("lexeme array may not contain nulls")));
324 :
325 8 : lex = VARDATA(dlexemes[i]);
326 8 : lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
327 8 : lex_pos = tsvector_bsearch(tsout, lex, lex_len);
328 :
329 8 : if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
330 : {
331 4 : WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
332 :
333 17 : while (j--)
334 : {
335 9 : WEP_SETWEIGHT(*p, weight);
336 9 : p++;
337 : }
338 : }
339 : }
340 :
341 4 : PG_FREE_IF_COPY(tsin, 0);
342 4 : PG_FREE_IF_COPY(lexemes, 2);
343 :
344 4 : PG_RETURN_POINTER(tsout);
345 : }
346 :
347 : #define compareEntry(pa, a, pb, b) \
348 : tsCompareString((pa) + (a)->pos, (a)->len, \
349 : (pb) + (b)->pos, (b)->len, \
350 : false)
351 :
352 : /*
353 : * Add positions from src to dest after offsetting them by maxpos.
354 : * Return the number added (might be less than expected due to overflow)
355 : */
356 : static int32
357 2 : add_pos(TSVector src, WordEntry *srcptr,
358 : TSVector dest, WordEntry *destptr,
359 : int32 maxpos)
360 : {
361 2 : uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
362 : int i;
363 2 : uint16 slen = POSDATALEN(src, srcptr),
364 : startlen;
365 2 : WordEntryPos *spos = POSDATAPTR(src, srcptr),
366 2 : *dpos = POSDATAPTR(dest, destptr);
367 :
368 2 : if (!destptr->haspos)
369 0 : *clen = 0;
370 :
371 2 : startlen = *clen;
372 6 : for (i = 0;
373 8 : i < slen && *clen < MAXNUMPOS &&
374 3 : (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
375 2 : i++)
376 : {
377 2 : WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
378 2 : WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
379 2 : (*clen)++;
380 : }
381 :
382 2 : if (*clen != startlen)
383 2 : destptr->haspos = 1;
384 2 : return *clen - startlen;
385 : }
386 :
387 : /*
388 : * Perform binary search of given lexeme in TSVector.
389 : * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
390 : * found.
391 : */
392 : static int
393 33 : tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
394 : {
395 33 : WordEntry *arrin = ARRPTR(tsv);
396 33 : int StopLow = 0,
397 33 : StopHigh = tsv->size,
398 : StopMiddle,
399 : cmp;
400 :
401 117 : while (StopLow < StopHigh)
402 : {
403 76 : StopMiddle = (StopLow + StopHigh) / 2;
404 :
405 152 : cmp = tsCompareString(lexeme, lexeme_len,
406 76 : STRPTR(tsv) + arrin[StopMiddle].pos,
407 76 : arrin[StopMiddle].len,
408 : false);
409 :
410 76 : if (cmp < 0)
411 32 : StopHigh = StopMiddle;
412 44 : else if (cmp > 0)
413 19 : StopLow = StopMiddle + 1;
414 : else /* found it */
415 25 : return StopMiddle;
416 : }
417 :
418 8 : return -1;
419 : }
420 :
421 : /*
422 : * qsort comparator functions
423 : */
424 :
425 : static int
426 6 : compare_int(const void *va, const void *vb)
427 : {
428 6 : int a = *((const int *) va);
429 6 : int b = *((const int *) vb);
430 :
431 6 : if (a == b)
432 1 : return 0;
433 5 : return (a > b) ? 1 : -1;
434 : }
435 :
436 : static int
437 17 : compare_text_lexemes(const void *va, const void *vb)
438 : {
439 17 : Datum a = *((const Datum *) va);
440 17 : Datum b = *((const Datum *) vb);
441 17 : char *alex = VARDATA_ANY(a);
442 17 : int alex_len = VARSIZE_ANY_EXHDR(a);
443 17 : char *blex = VARDATA_ANY(b);
444 17 : int blex_len = VARSIZE_ANY_EXHDR(b);
445 :
446 17 : return tsCompareString(alex, alex_len, blex, blex_len, false);
447 : }
448 :
449 : /*
450 : * Internal routine to delete lexemes from TSVector by array of offsets.
451 : *
452 : * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
453 : * int indices_count -- size of that array
454 : *
455 : * Returns new TSVector without given lexemes along with their positions
456 : * and weights.
457 : */
458 : static TSVector
459 10 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
460 : int indices_count)
461 : {
462 : TSVector tsout;
463 10 : WordEntry *arrin = ARRPTR(tsv),
464 : *arrout;
465 10 : char *data = STRPTR(tsv),
466 : *dataout;
467 : int i, /* index in arrin */
468 : j, /* index in arrout */
469 : k, /* index in indices_to_delete */
470 : curoff; /* index in dataout area */
471 :
472 : /*
473 : * Sort the filter array to simplify membership checks below. Also, get
474 : * rid of any duplicate entries, so that we can assume that indices_count
475 : * is exactly equal to the number of lexemes that will be removed.
476 : */
477 10 : if (indices_count > 1)
478 : {
479 : int kp;
480 :
481 4 : qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
482 4 : kp = 0;
483 9 : for (k = 1; k < indices_count; k++)
484 : {
485 5 : if (indices_to_delete[k] != indices_to_delete[kp])
486 4 : indices_to_delete[++kp] = indices_to_delete[k];
487 : }
488 4 : indices_count = ++kp;
489 : }
490 :
491 : /*
492 : * Here we overestimate tsout size, since we don't know how much space is
493 : * used by the deleted lexeme(s). We will set exact size below.
494 : */
495 10 : tsout = (TSVector) palloc0(VARSIZE(tsv));
496 :
497 : /* This count must be correct because STRPTR(tsout) relies on it. */
498 10 : tsout->size = tsv->size - indices_count;
499 :
500 : /*
501 : * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
502 : */
503 10 : arrout = ARRPTR(tsout);
504 10 : dataout = STRPTR(tsout);
505 10 : curoff = 0;
506 60 : for (i = j = k = 0; i < tsv->size; i++)
507 : {
508 : /*
509 : * If current i is present in indices_to_delete, skip this lexeme.
510 : * Since indices_to_delete is already sorted, we only need to check
511 : * the current (k'th) entry.
512 : */
513 50 : if (k < indices_count && i == indices_to_delete[k])
514 : {
515 14 : k++;
516 14 : continue;
517 : }
518 :
519 : /* Copy lexeme and its positions and weights */
520 36 : memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
521 36 : arrout[j].haspos = arrin[i].haspos;
522 36 : arrout[j].len = arrin[i].len;
523 36 : arrout[j].pos = curoff;
524 36 : curoff += arrin[i].len;
525 36 : if (arrin[i].haspos)
526 : {
527 26 : int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
528 : + sizeof(uint16);
529 :
530 26 : curoff = SHORTALIGN(curoff);
531 78 : memcpy(dataout + curoff,
532 52 : STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
533 : len);
534 26 : curoff += len;
535 : }
536 :
537 36 : j++;
538 : }
539 :
540 : /*
541 : * k should now be exactly equal to indices_count. If it isn't then the
542 : * caller provided us with indices outside of [0, tsv->size) range and
543 : * estimation of tsout's size is wrong.
544 : */
545 10 : Assert(k == indices_count);
546 :
547 10 : SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
548 10 : return tsout;
549 : }
550 :
551 : /*
552 : * Delete given lexeme from tsvector.
553 : * Implementation of user-level ts_delete(tsvector, text).
554 : */
555 : Datum
556 6 : tsvector_delete_str(PG_FUNCTION_ARGS)
557 : {
558 6 : TSVector tsin = PG_GETARG_TSVECTOR(0),
559 : tsout;
560 6 : text *tlexeme = PG_GETARG_TEXT_PP(1);
561 6 : char *lexeme = VARDATA_ANY(tlexeme);
562 6 : int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
563 : skip_index;
564 :
565 6 : if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
566 2 : PG_RETURN_POINTER(tsin);
567 :
568 4 : tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
569 :
570 4 : PG_FREE_IF_COPY(tsin, 0);
571 4 : PG_FREE_IF_COPY(tlexeme, 1);
572 4 : PG_RETURN_POINTER(tsout);
573 : }
574 :
575 : /*
576 : * Delete given array of lexemes from tsvector.
577 : * Implementation of user-level ts_delete(tsvector, text[]).
578 : */
579 : Datum
580 7 : tsvector_delete_arr(PG_FUNCTION_ARGS)
581 : {
582 7 : TSVector tsin = PG_GETARG_TSVECTOR(0),
583 : tsout;
584 7 : ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
585 : int i,
586 : nlex,
587 : skip_count,
588 : *skip_indices;
589 : Datum *dlexemes;
590 : bool *nulls;
591 :
592 7 : deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
593 : &dlexemes, &nulls, &nlex);
594 :
595 : /*
596 : * In typical use case array of lexemes to delete is relatively small. So
597 : * here we optimize things for that scenario: iterate through lexarr
598 : * performing binary search of each lexeme from lexarr in tsvector.
599 : */
600 7 : skip_indices = palloc0(nlex * sizeof(int));
601 26 : for (i = skip_count = 0; i < nlex; i++)
602 : {
603 : char *lex;
604 : int lex_len,
605 : lex_pos;
606 :
607 20 : if (nulls[i])
608 1 : ereport(ERROR,
609 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
610 : errmsg("lexeme array may not contain nulls")));
611 :
612 19 : lex = VARDATA(dlexemes[i]);
613 19 : lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
614 19 : lex_pos = tsvector_bsearch(tsin, lex, lex_len);
615 :
616 19 : if (lex_pos >= 0)
617 13 : skip_indices[skip_count++] = lex_pos;
618 : }
619 :
620 6 : tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
621 :
622 6 : pfree(skip_indices);
623 6 : PG_FREE_IF_COPY(tsin, 0);
624 6 : PG_FREE_IF_COPY(lexemes, 1);
625 :
626 6 : PG_RETURN_POINTER(tsout);
627 : }
628 :
629 : /*
630 : * Expand tsvector as table with following columns:
631 : * lexeme: lexeme text
632 : * positions: integer array of lexeme positions
633 : * weights: char array of weights corresponding to positions
634 : */
635 : Datum
636 30 : tsvector_unnest(PG_FUNCTION_ARGS)
637 : {
638 : FuncCallContext *funcctx;
639 : TSVector tsin;
640 :
641 30 : if (SRF_IS_FIRSTCALL())
642 : {
643 : MemoryContext oldcontext;
644 : TupleDesc tupdesc;
645 :
646 5 : funcctx = SRF_FIRSTCALL_INIT();
647 5 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
648 :
649 5 : tupdesc = CreateTemplateTupleDesc(3, false);
650 5 : TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
651 : TEXTOID, -1, 0);
652 5 : TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
653 : INT2ARRAYOID, -1, 0);
654 5 : TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
655 : TEXTARRAYOID, -1, 0);
656 5 : funcctx->tuple_desc = BlessTupleDesc(tupdesc);
657 :
658 5 : funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
659 :
660 5 : MemoryContextSwitchTo(oldcontext);
661 : }
662 :
663 30 : funcctx = SRF_PERCALL_SETUP();
664 30 : tsin = (TSVector) funcctx->user_fctx;
665 :
666 30 : if (funcctx->call_cntr < tsin->size)
667 : {
668 25 : WordEntry *arrin = ARRPTR(tsin);
669 25 : char *data = STRPTR(tsin);
670 : HeapTuple tuple;
671 : int j,
672 25 : i = funcctx->call_cntr;
673 25 : bool nulls[] = {false, false, false};
674 : Datum values[3];
675 :
676 25 : values[0] = PointerGetDatum(
677 : cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
678 : );
679 :
680 25 : if (arrin[i].haspos)
681 : {
682 : WordEntryPosVector *posv;
683 : Datum *positions;
684 : Datum *weights;
685 : char weight;
686 :
687 : /*
688 : * Internally tsvector stores position and weight in the same
689 : * uint16 (2 bits for weight, 14 for position). Here we extract
690 : * that in two separate arrays.
691 : */
692 15 : posv = _POSVECPTR(tsin, arrin + i);
693 15 : positions = palloc(posv->npos * sizeof(Datum));
694 15 : weights = palloc(posv->npos * sizeof(Datum));
695 42 : for (j = 0; j < posv->npos; j++)
696 : {
697 27 : positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
698 27 : weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
699 27 : weights[j] = PointerGetDatum(
700 : cstring_to_text_with_len(&weight, 1)
701 : );
702 : }
703 :
704 15 : values[1] = PointerGetDatum(
705 : construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
706 15 : values[2] = PointerGetDatum(
707 : construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
708 : }
709 : else
710 : {
711 10 : nulls[1] = nulls[2] = true;
712 : }
713 :
714 25 : tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
715 25 : SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
716 : }
717 : else
718 : {
719 5 : pfree(tsin);
720 5 : SRF_RETURN_DONE(funcctx);
721 : }
722 : }
723 :
724 : /*
725 : * Convert tsvector to array of lexemes.
726 : */
727 : Datum
728 2 : tsvector_to_array(PG_FUNCTION_ARGS)
729 : {
730 2 : TSVector tsin = PG_GETARG_TSVECTOR(0);
731 2 : WordEntry *arrin = ARRPTR(tsin);
732 : Datum *elements;
733 : int i;
734 : ArrayType *array;
735 :
736 2 : elements = palloc(tsin->size * sizeof(Datum));
737 :
738 12 : for (i = 0; i < tsin->size; i++)
739 : {
740 10 : elements[i] = PointerGetDatum(
741 : cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
742 : );
743 : }
744 :
745 2 : array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
746 :
747 2 : pfree(elements);
748 2 : PG_FREE_IF_COPY(tsin, 0);
749 2 : PG_RETURN_POINTER(array);
750 : }
751 :
752 : /*
753 : * Build tsvector from array of lexemes.
754 : */
755 : Datum
756 3 : array_to_tsvector(PG_FUNCTION_ARGS)
757 : {
758 3 : ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
759 : TSVector tsout;
760 : Datum *dlexemes;
761 : WordEntry *arrout;
762 : bool *nulls;
763 : int nitems,
764 : i,
765 : j,
766 : tslen,
767 3 : datalen = 0;
768 : char *cur;
769 :
770 3 : deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
771 :
772 : /* Reject nulls (maybe we should just ignore them, instead?) */
773 16 : for (i = 0; i < nitems; i++)
774 : {
775 14 : if (nulls[i])
776 1 : ereport(ERROR,
777 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
778 : errmsg("lexeme array may not contain nulls")));
779 : }
780 :
781 : /* Sort and de-dup, because this is required for a valid tsvector. */
782 2 : if (nitems > 1)
783 : {
784 2 : qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
785 2 : j = 0;
786 9 : for (i = 1; i < nitems; i++)
787 : {
788 7 : if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
789 6 : dlexemes[++j] = dlexemes[i];
790 : }
791 2 : nitems = ++j;
792 : }
793 :
794 : /* Calculate space needed for surviving lexemes. */
795 10 : for (i = 0; i < nitems; i++)
796 8 : datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
797 2 : tslen = CALCDATASIZE(nitems, datalen);
798 :
799 : /* Allocate and fill tsvector. */
800 2 : tsout = (TSVector) palloc0(tslen);
801 2 : SET_VARSIZE(tsout, tslen);
802 2 : tsout->size = nitems;
803 :
804 2 : arrout = ARRPTR(tsout);
805 2 : cur = STRPTR(tsout);
806 10 : for (i = 0; i < nitems; i++)
807 : {
808 8 : char *lex = VARDATA(dlexemes[i]);
809 8 : int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
810 :
811 8 : memcpy(cur, lex, lex_len);
812 8 : arrout[i].haspos = 0;
813 8 : arrout[i].len = lex_len;
814 8 : arrout[i].pos = cur - STRPTR(tsout);
815 8 : cur += lex_len;
816 : }
817 :
818 2 : PG_FREE_IF_COPY(v, 0);
819 2 : PG_RETURN_POINTER(tsout);
820 : }
821 :
822 : /*
823 : * ts_filter(): keep only lexemes with given weights in tsvector.
824 : */
825 : Datum
826 3 : tsvector_filter(PG_FUNCTION_ARGS)
827 : {
828 3 : TSVector tsin = PG_GETARG_TSVECTOR(0),
829 : tsout;
830 3 : ArrayType *weights = PG_GETARG_ARRAYTYPE_P(1);
831 3 : WordEntry *arrin = ARRPTR(tsin),
832 : *arrout;
833 3 : char *datain = STRPTR(tsin),
834 : *dataout;
835 : Datum *dweights;
836 : bool *nulls;
837 : int nweights;
838 : int i,
839 : j;
840 3 : int cur_pos = 0;
841 3 : char mask = 0;
842 :
843 3 : deconstruct_array(weights, CHAROID, 1, true, 'c',
844 : &dweights, &nulls, &nweights);
845 :
846 7 : for (i = 0; i < nweights; i++)
847 : {
848 : char char_weight;
849 :
850 5 : if (nulls[i])
851 1 : ereport(ERROR,
852 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
853 : errmsg("weight array may not contain nulls")));
854 :
855 4 : char_weight = DatumGetChar(dweights[i]);
856 4 : switch (char_weight)
857 : {
858 : case 'A':
859 : case 'a':
860 3 : mask = mask | 8;
861 3 : break;
862 : case 'B':
863 : case 'b':
864 1 : mask = mask | 4;
865 1 : break;
866 : case 'C':
867 : case 'c':
868 0 : mask = mask | 2;
869 0 : break;
870 : case 'D':
871 : case 'd':
872 0 : mask = mask | 1;
873 0 : break;
874 : default:
875 0 : ereport(ERROR,
876 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
877 : errmsg("unrecognized weight: \"%c\"", char_weight)));
878 : }
879 : }
880 :
881 2 : tsout = (TSVector) palloc0(VARSIZE(tsin));
882 2 : tsout->size = tsin->size;
883 2 : arrout = ARRPTR(tsout);
884 2 : dataout = STRPTR(tsout);
885 :
886 18 : for (i = j = 0; i < tsin->size; i++)
887 : {
888 : WordEntryPosVector *posvin,
889 : *posvout;
890 16 : int npos = 0;
891 : int k;
892 :
893 16 : if (!arrin[i].haspos)
894 5 : continue;
895 :
896 11 : posvin = _POSVECPTR(tsin, arrin + i);
897 11 : posvout = (WordEntryPosVector *)
898 11 : (dataout + SHORTALIGN(cur_pos + arrin[i].len));
899 :
900 22 : for (k = 0; k < posvin->npos; k++)
901 : {
902 11 : if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
903 5 : posvout->pos[npos++] = posvin->pos[k];
904 : }
905 :
906 : /* if no satisfactory positions found, skip lexeme */
907 11 : if (!npos)
908 6 : continue;
909 :
910 5 : arrout[j].haspos = true;
911 5 : arrout[j].len = arrin[i].len;
912 5 : arrout[j].pos = cur_pos;
913 :
914 5 : memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
915 5 : posvout->npos = npos;
916 5 : cur_pos += SHORTALIGN(arrin[i].len);
917 5 : cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
918 : sizeof(uint16);
919 5 : j++;
920 : }
921 :
922 2 : tsout->size = j;
923 2 : if (dataout != STRPTR(tsout))
924 2 : memmove(STRPTR(tsout), dataout, cur_pos);
925 :
926 2 : SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
927 :
928 2 : PG_FREE_IF_COPY(tsin, 0);
929 2 : PG_RETURN_POINTER(tsout);
930 : }
931 :
932 : Datum
933 2 : tsvector_concat(PG_FUNCTION_ARGS)
934 : {
935 2 : TSVector in1 = PG_GETARG_TSVECTOR(0);
936 2 : TSVector in2 = PG_GETARG_TSVECTOR(1);
937 : TSVector out;
938 : WordEntry *ptr;
939 : WordEntry *ptr1,
940 : *ptr2;
941 : WordEntryPos *p;
942 2 : int maxpos = 0,
943 : i,
944 : j,
945 : i1,
946 : i2,
947 : dataoff,
948 : output_bytes,
949 : output_size;
950 : char *data,
951 : *data1,
952 : *data2;
953 :
954 : /* Get max position in in1; we'll need this to offset in2's positions */
955 2 : ptr = ARRPTR(in1);
956 2 : i = in1->size;
957 7 : while (i--)
958 : {
959 3 : if ((j = POSDATALEN(in1, ptr)) != 0)
960 : {
961 3 : p = POSDATAPTR(in1, ptr);
962 9 : while (j--)
963 : {
964 3 : if (WEP_GETPOS(*p) > maxpos)
965 2 : maxpos = WEP_GETPOS(*p);
966 3 : p++;
967 : }
968 : }
969 3 : ptr++;
970 : }
971 :
972 2 : ptr1 = ARRPTR(in1);
973 2 : ptr2 = ARRPTR(in2);
974 2 : data1 = STRPTR(in1);
975 2 : data2 = STRPTR(in2);
976 2 : i1 = in1->size;
977 2 : i2 = in2->size;
978 :
979 : /*
980 : * Conservative estimate of space needed. We might need all the data in
981 : * both inputs, and conceivably add a pad byte before position data for
982 : * each item where there was none before.
983 : */
984 2 : output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
985 :
986 2 : out = (TSVector) palloc0(output_bytes);
987 2 : SET_VARSIZE(out, output_bytes);
988 :
989 : /*
990 : * We must make out->size valid so that STRPTR(out) is sensible. We'll
991 : * collapse out any unused space at the end.
992 : */
993 2 : out->size = in1->size + in2->size;
994 :
995 2 : ptr = ARRPTR(out);
996 2 : data = STRPTR(out);
997 2 : dataoff = 0;
998 7 : while (i1 && i2)
999 : {
1000 3 : int cmp = compareEntry(data1, ptr1, data2, ptr2);
1001 :
1002 3 : if (cmp < 0)
1003 : { /* in1 first */
1004 1 : ptr->haspos = ptr1->haspos;
1005 1 : ptr->len = ptr1->len;
1006 1 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1007 1 : ptr->pos = dataoff;
1008 1 : dataoff += ptr1->len;
1009 1 : if (ptr->haspos)
1010 : {
1011 1 : dataoff = SHORTALIGN(dataoff);
1012 1 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1013 1 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1014 : }
1015 :
1016 1 : ptr++;
1017 1 : ptr1++;
1018 1 : i1--;
1019 : }
1020 2 : else if (cmp > 0)
1021 : { /* in2 first */
1022 1 : ptr->haspos = ptr2->haspos;
1023 1 : ptr->len = ptr2->len;
1024 1 : memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1025 1 : ptr->pos = dataoff;
1026 1 : dataoff += ptr2->len;
1027 1 : if (ptr->haspos)
1028 : {
1029 0 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1030 :
1031 0 : if (addlen == 0)
1032 0 : ptr->haspos = 0;
1033 : else
1034 : {
1035 0 : dataoff = SHORTALIGN(dataoff);
1036 0 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1037 : }
1038 : }
1039 :
1040 1 : ptr++;
1041 1 : ptr2++;
1042 1 : i2--;
1043 : }
1044 : else
1045 : {
1046 1 : ptr->haspos = ptr1->haspos | ptr2->haspos;
1047 1 : ptr->len = ptr1->len;
1048 1 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1049 1 : ptr->pos = dataoff;
1050 1 : dataoff += ptr1->len;
1051 1 : if (ptr->haspos)
1052 : {
1053 1 : if (ptr1->haspos)
1054 : {
1055 1 : dataoff = SHORTALIGN(dataoff);
1056 1 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1057 1 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1058 1 : if (ptr2->haspos)
1059 1 : dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1060 : }
1061 : else /* must have ptr2->haspos */
1062 : {
1063 0 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1064 :
1065 0 : if (addlen == 0)
1066 0 : ptr->haspos = 0;
1067 : else
1068 : {
1069 0 : dataoff = SHORTALIGN(dataoff);
1070 0 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1071 : }
1072 : }
1073 : }
1074 :
1075 1 : ptr++;
1076 1 : ptr1++;
1077 1 : ptr2++;
1078 1 : i1--;
1079 1 : i2--;
1080 : }
1081 : }
1082 :
1083 5 : while (i1)
1084 : {
1085 1 : ptr->haspos = ptr1->haspos;
1086 1 : ptr->len = ptr1->len;
1087 1 : memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1088 1 : ptr->pos = dataoff;
1089 1 : dataoff += ptr1->len;
1090 1 : if (ptr->haspos)
1091 : {
1092 1 : dataoff = SHORTALIGN(dataoff);
1093 1 : memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1094 1 : dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1095 : }
1096 :
1097 1 : ptr++;
1098 1 : ptr1++;
1099 1 : i1--;
1100 : }
1101 :
1102 5 : while (i2)
1103 : {
1104 1 : ptr->haspos = ptr2->haspos;
1105 1 : ptr->len = ptr2->len;
1106 1 : memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1107 1 : ptr->pos = dataoff;
1108 1 : dataoff += ptr2->len;
1109 1 : if (ptr->haspos)
1110 : {
1111 1 : int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1112 :
1113 1 : if (addlen == 0)
1114 0 : ptr->haspos = 0;
1115 : else
1116 : {
1117 1 : dataoff = SHORTALIGN(dataoff);
1118 1 : dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1119 : }
1120 : }
1121 :
1122 1 : ptr++;
1123 1 : ptr2++;
1124 1 : i2--;
1125 : }
1126 :
1127 : /*
1128 : * Instead of checking each offset individually, we check for overflow of
1129 : * pos fields once at the end.
1130 : */
1131 2 : if (dataoff > MAXSTRPOS)
1132 0 : ereport(ERROR,
1133 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1134 : errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1135 :
1136 : /*
1137 : * Adjust sizes (asserting that we didn't overrun the original estimates)
1138 : * and collapse out any unused array entries.
1139 : */
1140 2 : output_size = ptr - ARRPTR(out);
1141 2 : Assert(output_size <= out->size);
1142 2 : out->size = output_size;
1143 2 : if (data != STRPTR(out))
1144 1 : memmove(STRPTR(out), data, dataoff);
1145 2 : output_bytes = CALCDATASIZE(out->size, dataoff);
1146 2 : Assert(output_bytes <= VARSIZE(out));
1147 2 : SET_VARSIZE(out, output_bytes);
1148 :
1149 2 : PG_FREE_IF_COPY(in1, 0);
1150 2 : PG_FREE_IF_COPY(in2, 1);
1151 2 : PG_RETURN_POINTER(out);
1152 : }
1153 :
1154 : /*
1155 : * Compare two strings by tsvector rules.
1156 : *
1157 : * if isPrefix = true then it returns zero value iff b has prefix a
1158 : */
1159 : int32
1160 842914 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1161 : {
1162 : int cmp;
1163 :
1164 842914 : if (lena == 0)
1165 : {
1166 0 : if (prefix)
1167 0 : cmp = 0; /* empty string is prefix of anything */
1168 : else
1169 0 : cmp = (lenb > 0) ? -1 : 0;
1170 : }
1171 842914 : else if (lenb == 0)
1172 : {
1173 0 : cmp = (lena > 0) ? 1 : 0;
1174 : }
1175 : else
1176 : {
1177 842914 : cmp = memcmp(a, b, Min(lena, lenb));
1178 :
1179 842914 : if (prefix)
1180 : {
1181 1680 : if (cmp == 0 && lena > lenb)
1182 0 : cmp = 1; /* a is longer, so not a prefix of b */
1183 : }
1184 841234 : else if (cmp == 0 && lena != lenb)
1185 : {
1186 3291 : cmp = (lena < lenb) ? -1 : 1;
1187 : }
1188 : }
1189 :
1190 842914 : return cmp;
1191 : }
1192 :
1193 : /*
1194 : * Check weight info or/and fill 'data' with the required positions
1195 : */
1196 : static bool
1197 3667 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
1198 : ExecPhraseData *data)
1199 : {
1200 3667 : bool result = false;
1201 :
1202 3667 : if (entry->haspos && (val->weight || data))
1203 132 : {
1204 : WordEntryPosVector *posvec;
1205 :
1206 : /*
1207 : * We can't use the _POSVECPTR macro here because the pointer to the
1208 : * tsvector's lexeme storage is already contained in chkval->values.
1209 : */
1210 132 : posvec = (WordEntryPosVector *)
1211 132 : (chkval->values + SHORTALIGN(entry->pos + entry->len));
1212 :
1213 132 : if (val->weight && data)
1214 0 : {
1215 0 : WordEntryPos *posvec_iter = posvec->pos;
1216 : WordEntryPos *dptr;
1217 :
1218 : /*
1219 : * Filter position information by weights
1220 : */
1221 0 : dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
1222 0 : data->allocated = true;
1223 :
1224 : /* Is there a position with a matching weight? */
1225 0 : while (posvec_iter < posvec->pos + posvec->npos)
1226 : {
1227 : /* If true, append this position to the data->pos */
1228 0 : if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1229 : {
1230 0 : *dptr = WEP_GETPOS(*posvec_iter);
1231 0 : dptr++;
1232 : }
1233 :
1234 0 : posvec_iter++;
1235 : }
1236 :
1237 0 : data->npos = dptr - data->pos;
1238 :
1239 0 : if (data->npos > 0)
1240 0 : result = true;
1241 : }
1242 132 : else if (val->weight)
1243 : {
1244 21 : WordEntryPos *posvec_iter = posvec->pos;
1245 :
1246 : /* Is there a position with a matching weight? */
1247 54 : while (posvec_iter < posvec->pos + posvec->npos)
1248 : {
1249 29 : if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1250 : {
1251 17 : result = true;
1252 17 : break; /* no need to go further */
1253 : }
1254 :
1255 12 : posvec_iter++;
1256 : }
1257 : }
1258 : else /* data != NULL */
1259 : {
1260 111 : data->npos = posvec->npos;
1261 111 : data->pos = posvec->pos;
1262 111 : data->allocated = false;
1263 111 : result = true;
1264 : }
1265 : }
1266 : else
1267 : {
1268 3535 : result = true;
1269 : }
1270 :
1271 3667 : return result;
1272 : }
1273 :
1274 : /*
1275 : * Removes duplicate pos entries. We can't use uniquePos() from
1276 : * tsvector.c because array might be longer than MAXENTRYPOS
1277 : *
1278 : * Returns new length.
1279 : */
1280 : static int
1281 2 : uniqueLongPos(WordEntryPos *pos, int npos)
1282 : {
1283 : WordEntryPos *pos_iter,
1284 : *result;
1285 :
1286 2 : if (npos <= 1)
1287 1 : return npos;
1288 :
1289 1 : qsort((void *) pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1290 :
1291 1 : result = pos;
1292 1 : pos_iter = pos + 1;
1293 3 : while (pos_iter < pos + npos)
1294 : {
1295 1 : if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
1296 : {
1297 1 : result++;
1298 1 : *result = WEP_GETPOS(*pos_iter);
1299 : }
1300 :
1301 1 : pos_iter++;
1302 : }
1303 :
1304 1 : return result + 1 - pos;
1305 : }
1306 :
1307 : /*
1308 : * is there value 'val' in array or not ?
1309 : */
1310 : static bool
1311 12486 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
1312 : {
1313 12486 : CHKVAL *chkval = (CHKVAL *) checkval;
1314 12486 : WordEntry *StopLow = chkval->arrb;
1315 12486 : WordEntry *StopHigh = chkval->arre;
1316 12486 : WordEntry *StopMiddle = StopHigh;
1317 12486 : int difference = -1;
1318 12486 : bool res = false;
1319 :
1320 : /* Loop invariant: StopLow <= val < StopHigh */
1321 90832 : while (StopLow < StopHigh)
1322 : {
1323 68028 : StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1324 204084 : difference = tsCompareString(chkval->operand + val->distance,
1325 68028 : val->length,
1326 68028 : chkval->values + StopMiddle->pos,
1327 68028 : StopMiddle->len,
1328 : false);
1329 :
1330 68028 : if (difference == 0)
1331 : {
1332 : /* Check weight info & fill 'data' with positions */
1333 2168 : res = checkclass_str(chkval, StopMiddle, val, data);
1334 2168 : break;
1335 : }
1336 65860 : else if (difference > 0)
1337 35226 : StopLow = StopMiddle + 1;
1338 : else
1339 30634 : StopHigh = StopMiddle;
1340 : }
1341 :
1342 12486 : if ((!res || data) && val->prefix)
1343 : {
1344 1652 : WordEntryPos *allpos = NULL;
1345 1652 : int npos = 0,
1346 1652 : totalpos = 0;
1347 :
1348 : /*
1349 : * there was a failed exact search, so we should scan further to find
1350 : * a prefix match. We also need to do so if caller needs position info
1351 : */
1352 1652 : if (StopLow >= StopHigh)
1353 1650 : StopMiddle = StopHigh;
1354 :
1355 6395 : while ((!res || data) && StopMiddle < chkval->arre &&
1356 4776 : tsCompareString(chkval->operand + val->distance,
1357 1592 : val->length,
1358 1592 : chkval->values + StopMiddle->pos,
1359 1592 : StopMiddle->len,
1360 : true) == 0)
1361 : {
1362 1499 : if (data)
1363 : {
1364 : /*
1365 : * We need to join position information
1366 : */
1367 3 : res = checkclass_str(chkval, StopMiddle, val, data);
1368 :
1369 3 : if (res)
1370 : {
1371 8 : while (npos + data->npos >= totalpos)
1372 : {
1373 2 : if (totalpos == 0)
1374 : {
1375 2 : totalpos = 256;
1376 2 : allpos = palloc(sizeof(WordEntryPos) * totalpos);
1377 : }
1378 : else
1379 : {
1380 0 : totalpos *= 2;
1381 0 : allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
1382 : }
1383 : }
1384 :
1385 3 : memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1386 3 : npos += data->npos;
1387 : }
1388 : }
1389 : else
1390 : {
1391 1496 : res = checkclass_str(chkval, StopMiddle, val, NULL);
1392 : }
1393 :
1394 1499 : StopMiddle++;
1395 : }
1396 :
1397 1652 : if (res && data)
1398 : {
1399 : /* Sort and make unique array of found positions */
1400 2 : data->pos = allpos;
1401 2 : data->npos = uniqueLongPos(allpos, npos);
1402 2 : data->allocated = true;
1403 : }
1404 : }
1405 :
1406 12486 : return res;
1407 : }
1408 :
1409 : /*
1410 : * Compute output position list for a tsquery operator in phrase mode.
1411 : *
1412 : * Merge the position lists in Ldata and Rdata as specified by "emit",
1413 : * returning the result list into *data. The input position lists must be
1414 : * sorted and unique, and the output will be as well.
1415 : *
1416 : * data: pointer to initially-all-zeroes output struct, or NULL
1417 : * Ldata, Rdata: input position lists
1418 : * emit: bitmask of TSPO_XXX flags
1419 : * Loffset: offset to be added to Ldata positions before comparing/outputting
1420 : * Roffset: offset to be added to Rdata positions before comparing/outputting
1421 : * max_npos: maximum possible required size of output position array
1422 : *
1423 : * Loffset and Roffset should not be negative, else we risk trying to output
1424 : * negative positions, which won't fit into WordEntryPos.
1425 : *
1426 : * Returns true if any positions were emitted to *data; or if data is NULL,
1427 : * returns true if any positions would have been emitted.
1428 : */
1429 : #define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1430 : #define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1431 : #define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1432 :
1433 : static bool
1434 107 : TS_phrase_output(ExecPhraseData *data,
1435 : ExecPhraseData *Ldata,
1436 : ExecPhraseData *Rdata,
1437 : int emit,
1438 : int Loffset,
1439 : int Roffset,
1440 : int max_npos)
1441 : {
1442 : int Lindex,
1443 : Rindex;
1444 :
1445 : /* Loop until both inputs are exhausted */
1446 107 : Lindex = Rindex = 0;
1447 287 : while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1448 : {
1449 : int Lpos,
1450 : Rpos;
1451 143 : int output_pos = 0;
1452 :
1453 : /*
1454 : * Fetch current values to compare. WEP_GETPOS() is needed because
1455 : * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1456 : */
1457 143 : if (Lindex < Ldata->npos)
1458 121 : Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1459 : else
1460 : {
1461 : /* L array exhausted, so we're done if R_ONLY isn't set */
1462 22 : if (!(emit & TSPO_R_ONLY))
1463 10 : break;
1464 12 : Lpos = INT_MAX;
1465 : }
1466 133 : if (Rindex < Rdata->npos)
1467 109 : Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1468 : else
1469 : {
1470 : /* R array exhausted, so we're done if L_ONLY isn't set */
1471 24 : if (!(emit & TSPO_L_ONLY))
1472 11 : break;
1473 13 : Rpos = INT_MAX;
1474 : }
1475 :
1476 : /* Merge-join the two input lists */
1477 122 : if (Lpos < Rpos)
1478 : {
1479 : /* Lpos is not matched in Rdata, should we output it? */
1480 33 : if (emit & TSPO_L_ONLY)
1481 17 : output_pos = Lpos;
1482 33 : Lindex++;
1483 : }
1484 89 : else if (Lpos == Rpos)
1485 : {
1486 : /* Lpos and Rpos match ... should we output it? */
1487 64 : if (emit & TSPO_BOTH)
1488 62 : output_pos = Rpos;
1489 64 : Lindex++;
1490 64 : Rindex++;
1491 : }
1492 : else /* Lpos > Rpos */
1493 : {
1494 : /* Rpos is not matched in Ldata, should we output it? */
1495 25 : if (emit & TSPO_R_ONLY)
1496 12 : output_pos = Rpos;
1497 25 : Rindex++;
1498 : }
1499 :
1500 122 : if (output_pos > 0)
1501 : {
1502 91 : if (data)
1503 : {
1504 : /* Store position, first allocating output array if needed */
1505 42 : if (data->pos == NULL)
1506 : {
1507 34 : data->pos = (WordEntryPos *)
1508 34 : palloc(max_npos * sizeof(WordEntryPos));
1509 34 : data->allocated = true;
1510 : }
1511 42 : data->pos[data->npos++] = output_pos;
1512 : }
1513 : else
1514 : {
1515 : /*
1516 : * Exact positions not needed, so return true as soon as we
1517 : * know there is at least one.
1518 : */
1519 49 : return true;
1520 : }
1521 : }
1522 : }
1523 :
1524 58 : if (data && data->npos > 0)
1525 : {
1526 : /* Let's assert we didn't overrun the array */
1527 34 : Assert(data->npos <= max_npos);
1528 34 : return true;
1529 : }
1530 24 : return false;
1531 : }
1532 :
1533 : /*
1534 : * Execute tsquery at or below an OP_PHRASE operator.
1535 : *
1536 : * This handles tsquery execution at recursion levels where we need to care
1537 : * about match locations.
1538 : *
1539 : * In addition to the same arguments used for TS_execute, the caller may pass
1540 : * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1541 : * match position info on success. data == NULL if no position data need be
1542 : * returned. (In practice, outside callers pass NULL, and only the internal
1543 : * recursion cases pass a data pointer.)
1544 : * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1545 : * This is OK because an outside call always starts from an OP_PHRASE node.
1546 : *
1547 : * The detailed semantics of the match data, given that the function returned
1548 : * "true" (successful match, or possible match), are:
1549 : *
1550 : * npos > 0, negate = false:
1551 : * query is matched at specified position(s) (and only those positions)
1552 : * npos > 0, negate = true:
1553 : * query is matched at all positions *except* specified position(s)
1554 : * npos = 0, negate = false:
1555 : * query is possibly matched, matching position(s) are unknown
1556 : * (this should only be returned when TS_EXEC_PHRASE_NO_POS flag is set)
1557 : * npos = 0, negate = true:
1558 : * query is matched at all positions
1559 : *
1560 : * Successful matches also return a "width" value which is the match width in
1561 : * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1562 : * and is the sum of the phrase operator distances for phrase matches. Note
1563 : * that when width > 0, the listed positions represent the ends of matches not
1564 : * the starts. (This unintuitive rule is needed to avoid possibly generating
1565 : * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1566 : *
1567 : * When the function returns "false" (no match), it must return npos = 0,
1568 : * negate = false (which is the state initialized by the caller); but the
1569 : * "width" output in such cases is undefined.
1570 : */
1571 : static bool
1572 431 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1573 : TSExecuteCallback chkcond,
1574 : ExecPhraseData *data)
1575 : {
1576 : ExecPhraseData Ldata,
1577 : Rdata;
1578 : bool lmatch,
1579 : rmatch;
1580 : int Loffset,
1581 : Roffset,
1582 : maxwidth;
1583 :
1584 : /* since this function recurses, it could be driven to stack overflow */
1585 431 : check_stack_depth();
1586 :
1587 431 : if (curitem->type == QI_VAL)
1588 251 : return chkcond(arg, (QueryOperand *) curitem, data);
1589 :
1590 180 : switch (curitem->qoperator.oper)
1591 : {
1592 : case OP_NOT:
1593 :
1594 : /*
1595 : * Because a "true" result with no specific positions is taken as
1596 : * uncertain, we need no special care here for !TS_EXEC_CALC_NOT.
1597 : * If it's a false positive, the right things happen anyway.
1598 : *
1599 : * Also, we need not touch data->width, since a NOT operation does
1600 : * not change the match width.
1601 : */
1602 6 : if (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1603 : {
1604 3 : if (data->npos > 0)
1605 : {
1606 : /* we have some positions, invert negate flag */
1607 3 : data->negate = !data->negate;
1608 3 : return true;
1609 : }
1610 0 : else if (data->negate)
1611 : {
1612 : /* change "match everywhere" to "match nowhere" */
1613 0 : data->negate = false;
1614 0 : return false;
1615 : }
1616 : /* match positions are, and remain, uncertain */
1617 0 : return true;
1618 : }
1619 : else
1620 : {
1621 : /* change "match nowhere" to "match everywhere" */
1622 3 : Assert(data->npos == 0 && !data->negate);
1623 3 : data->negate = true;
1624 3 : return true;
1625 : }
1626 :
1627 : case OP_PHRASE:
1628 : case OP_AND:
1629 152 : memset(&Ldata, 0, sizeof(Ldata));
1630 152 : memset(&Rdata, 0, sizeof(Rdata));
1631 :
1632 152 : if (!TS_phrase_execute(curitem + curitem->qoperator.left,
1633 : arg, flags, chkcond, &Ldata))
1634 39 : return false;
1635 :
1636 113 : if (!TS_phrase_execute(curitem + 1,
1637 : arg, flags, chkcond, &Rdata))
1638 24 : return false;
1639 :
1640 : /*
1641 : * If either operand has no position information, then we can't
1642 : * return position data, only a "possible match" result. "Possible
1643 : * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
1644 : * is set, otherwise return false.
1645 : */
1646 176 : if ((Ldata.npos == 0 && !Ldata.negate) ||
1647 88 : (Rdata.npos == 0 && !Rdata.negate))
1648 2 : return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
1649 :
1650 87 : if (curitem->qoperator.oper == OP_PHRASE)
1651 : {
1652 : /*
1653 : * Compute Loffset and Roffset suitable for phrase match, and
1654 : * compute overall width of whole phrase match.
1655 : */
1656 86 : Loffset = curitem->qoperator.distance + Rdata.width;
1657 86 : Roffset = 0;
1658 86 : if (data)
1659 48 : data->width = curitem->qoperator.distance +
1660 32 : Ldata.width + Rdata.width;
1661 : }
1662 : else
1663 : {
1664 : /*
1665 : * For OP_AND, set output width and alignment like OP_OR (see
1666 : * comment below)
1667 : */
1668 1 : maxwidth = Max(Ldata.width, Rdata.width);
1669 1 : Loffset = maxwidth - Ldata.width;
1670 1 : Roffset = maxwidth - Rdata.width;
1671 1 : if (data)
1672 1 : data->width = maxwidth;
1673 : }
1674 :
1675 87 : if (Ldata.negate && Rdata.negate)
1676 : {
1677 : /* !L & !R: treat as !(L | R) */
1678 0 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1679 : TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1680 : Loffset, Roffset,
1681 0 : Ldata.npos + Rdata.npos);
1682 0 : if (data)
1683 0 : data->negate = true;
1684 0 : return true;
1685 : }
1686 87 : else if (Ldata.negate)
1687 : {
1688 : /* !L & R */
1689 5 : return TS_phrase_output(data, &Ldata, &Rdata,
1690 : TSPO_R_ONLY,
1691 : Loffset, Roffset,
1692 : Rdata.npos);
1693 : }
1694 82 : else if (Rdata.negate)
1695 : {
1696 : /* L & !R */
1697 1 : return TS_phrase_output(data, &Ldata, &Rdata,
1698 : TSPO_L_ONLY,
1699 : Loffset, Roffset,
1700 : Ldata.npos);
1701 : }
1702 : else
1703 : {
1704 : /* straight AND */
1705 81 : return TS_phrase_output(data, &Ldata, &Rdata,
1706 : TSPO_BOTH,
1707 : Loffset, Roffset,
1708 81 : Min(Ldata.npos, Rdata.npos));
1709 : }
1710 :
1711 : case OP_OR:
1712 22 : memset(&Ldata, 0, sizeof(Ldata));
1713 22 : memset(&Rdata, 0, sizeof(Rdata));
1714 :
1715 22 : lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1716 : arg, flags, chkcond, &Ldata);
1717 22 : rmatch = TS_phrase_execute(curitem + 1,
1718 : arg, flags, chkcond, &Rdata);
1719 :
1720 22 : if (!lmatch && !rmatch)
1721 2 : return false;
1722 :
1723 : /*
1724 : * If a valid operand has no position information, then we can't
1725 : * return position data, only a "possible match" result. "Possible
1726 : * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
1727 : * is set, otherwise return false.
1728 : */
1729 20 : if ((lmatch && Ldata.npos == 0 && !Ldata.negate) ||
1730 7 : (rmatch && Rdata.npos == 0 && !Rdata.negate))
1731 0 : return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
1732 :
1733 : /*
1734 : * Cope with undefined output width from failed submatch. (This
1735 : * takes less code than trying to ensure that all failure returns
1736 : * set data->width to zero.)
1737 : */
1738 20 : if (!lmatch)
1739 3 : Ldata.width = 0;
1740 20 : if (!rmatch)
1741 13 : Rdata.width = 0;
1742 :
1743 : /*
1744 : * For OP_AND and OP_OR, report the width of the wider of the two
1745 : * inputs, and align the narrower input's positions to the right
1746 : * end of that width. This rule deals at least somewhat
1747 : * reasonably with cases like "x <-> (y | z <-> q)".
1748 : */
1749 20 : maxwidth = Max(Ldata.width, Rdata.width);
1750 20 : Loffset = maxwidth - Ldata.width;
1751 20 : Roffset = maxwidth - Rdata.width;
1752 20 : data->width = maxwidth;
1753 :
1754 20 : if (Ldata.negate && Rdata.negate)
1755 : {
1756 : /* !L | !R: treat as !(L & R) */
1757 0 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1758 : TSPO_BOTH,
1759 : Loffset, Roffset,
1760 0 : Min(Ldata.npos, Rdata.npos));
1761 0 : data->negate = true;
1762 0 : return true;
1763 : }
1764 20 : else if (Ldata.negate)
1765 : {
1766 : /* !L | R: treat as !(L & !R) */
1767 4 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1768 : TSPO_L_ONLY,
1769 : Loffset, Roffset,
1770 : Ldata.npos);
1771 4 : data->negate = true;
1772 4 : return true;
1773 : }
1774 16 : else if (Rdata.negate)
1775 : {
1776 : /* L | !R: treat as !(!L & R) */
1777 0 : (void) TS_phrase_output(data, &Ldata, &Rdata,
1778 : TSPO_R_ONLY,
1779 : Loffset, Roffset,
1780 : Rdata.npos);
1781 0 : data->negate = true;
1782 0 : return true;
1783 : }
1784 : else
1785 : {
1786 : /* straight OR */
1787 16 : return TS_phrase_output(data, &Ldata, &Rdata,
1788 : TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
1789 : Loffset, Roffset,
1790 16 : Ldata.npos + Rdata.npos);
1791 : }
1792 :
1793 : default:
1794 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1795 : }
1796 :
1797 : /* not reachable, but keep compiler quiet */
1798 : return false;
1799 : }
1800 :
1801 :
1802 : /*
1803 : * Evaluate tsquery boolean expression.
1804 : *
1805 : * curitem: current tsquery item (initially, the first one)
1806 : * arg: opaque value to pass through to callback function
1807 : * flags: bitmask of flag bits shown in ts_utils.h
1808 : * chkcond: callback function to check whether a primitive value is present
1809 : *
1810 : * The logic here deals only with operators above any phrase operator, for
1811 : * which we do not need to worry about lexeme positions. As soon as we hit an
1812 : * OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry.
1813 : */
1814 : bool
1815 48005 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1816 : TSExecuteCallback chkcond)
1817 : {
1818 : /* since this function recurses, it could be driven to stack overflow */
1819 48005 : check_stack_depth();
1820 :
1821 48005 : if (curitem->type == QI_VAL)
1822 27352 : return chkcond(arg, (QueryOperand *) curitem,
1823 : NULL /* we don't need position info */ );
1824 :
1825 20653 : switch (curitem->qoperator.oper)
1826 : {
1827 : case OP_NOT:
1828 2580 : if (flags & TS_EXEC_CALC_NOT)
1829 2542 : return !TS_execute(curitem + 1, arg, flags, chkcond);
1830 : else
1831 38 : return true;
1832 :
1833 : case OP_AND:
1834 8057 : if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
1835 1484 : return TS_execute(curitem + 1, arg, flags, chkcond);
1836 : else
1837 6573 : return false;
1838 :
1839 : case OP_OR:
1840 9900 : if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
1841 3697 : return true;
1842 : else
1843 6203 : return TS_execute(curitem + 1, arg, flags, chkcond);
1844 :
1845 : case OP_PHRASE:
1846 116 : return TS_phrase_execute(curitem, arg, flags, chkcond, NULL);
1847 :
1848 : default:
1849 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1850 : }
1851 :
1852 : /* not reachable, but keep compiler quiet */
1853 : return false;
1854 : }
1855 :
1856 : /*
1857 : * Detect whether a tsquery boolean expression requires any positive matches
1858 : * to values shown in the tsquery.
1859 : *
1860 : * This is needed to know whether a GIN index search requires full index scan.
1861 : * For example, 'x & !y' requires a match of x, so it's sufficient to scan
1862 : * entries for x; but 'x | !y' could match rows containing neither x nor y.
1863 : */
1864 : bool
1865 80 : tsquery_requires_match(QueryItem *curitem)
1866 : {
1867 : /* since this function recurses, it could be driven to stack overflow */
1868 80 : check_stack_depth();
1869 :
1870 80 : if (curitem->type == QI_VAL)
1871 47 : return true;
1872 :
1873 33 : switch (curitem->qoperator.oper)
1874 : {
1875 : case OP_NOT:
1876 :
1877 : /*
1878 : * Assume there are no required matches underneath a NOT. For
1879 : * some cases with nested NOTs, we could prove there's a required
1880 : * match, but it seems unlikely to be worth the trouble.
1881 : */
1882 2 : return false;
1883 :
1884 : case OP_PHRASE:
1885 :
1886 : /*
1887 : * Treat OP_PHRASE as OP_AND here
1888 : */
1889 : case OP_AND:
1890 : /* If either side requires a match, we're good */
1891 20 : if (tsquery_requires_match(curitem + curitem->qoperator.left))
1892 20 : return true;
1893 : else
1894 0 : return tsquery_requires_match(curitem + 1);
1895 :
1896 : case OP_OR:
1897 : /* Both sides must require a match */
1898 11 : if (tsquery_requires_match(curitem + curitem->qoperator.left))
1899 11 : return tsquery_requires_match(curitem + 1);
1900 : else
1901 0 : return false;
1902 :
1903 : default:
1904 0 : elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1905 : }
1906 :
1907 : /* not reachable, but keep compiler quiet */
1908 : return false;
1909 : }
1910 :
1911 : /*
1912 : * boolean operations
1913 : */
1914 : Datum
1915 10 : ts_match_qv(PG_FUNCTION_ARGS)
1916 : {
1917 10 : PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
1918 : PG_GETARG_DATUM(1),
1919 : PG_GETARG_DATUM(0)));
1920 : }
1921 :
1922 : Datum
1923 9479 : ts_match_vq(PG_FUNCTION_ARGS)
1924 : {
1925 9479 : TSVector val = PG_GETARG_TSVECTOR(0);
1926 9479 : TSQuery query = PG_GETARG_TSQUERY(1);
1927 : CHKVAL chkval;
1928 : bool result;
1929 :
1930 : /* empty query matches nothing */
1931 9479 : if (!query->size)
1932 : {
1933 0 : PG_FREE_IF_COPY(val, 0);
1934 0 : PG_FREE_IF_COPY(query, 1);
1935 0 : PG_RETURN_BOOL(false);
1936 : }
1937 :
1938 9479 : chkval.arrb = ARRPTR(val);
1939 9479 : chkval.arre = chkval.arrb + val->size;
1940 9479 : chkval.values = STRPTR(val);
1941 9479 : chkval.operand = GETOPERAND(query);
1942 9479 : result = TS_execute(GETQUERY(query),
1943 : &chkval,
1944 : TS_EXEC_CALC_NOT,
1945 : checkcondition_str);
1946 :
1947 9479 : PG_FREE_IF_COPY(val, 0);
1948 9479 : PG_FREE_IF_COPY(query, 1);
1949 9479 : PG_RETURN_BOOL(result);
1950 : }
1951 :
1952 : Datum
1953 0 : ts_match_tt(PG_FUNCTION_ARGS)
1954 : {
1955 : TSVector vector;
1956 : TSQuery query;
1957 : bool res;
1958 :
1959 0 : vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
1960 : PG_GETARG_DATUM(0)));
1961 0 : query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
1962 : PG_GETARG_DATUM(1)));
1963 :
1964 0 : res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
1965 : TSVectorGetDatum(vector),
1966 : TSQueryGetDatum(query)));
1967 :
1968 0 : pfree(vector);
1969 0 : pfree(query);
1970 :
1971 0 : PG_RETURN_BOOL(res);
1972 : }
1973 :
1974 : Datum
1975 0 : ts_match_tq(PG_FUNCTION_ARGS)
1976 : {
1977 : TSVector vector;
1978 0 : TSQuery query = PG_GETARG_TSQUERY(1);
1979 : bool res;
1980 :
1981 0 : vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
1982 : PG_GETARG_DATUM(0)));
1983 :
1984 0 : res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
1985 : TSVectorGetDatum(vector),
1986 : TSQueryGetDatum(query)));
1987 :
1988 0 : pfree(vector);
1989 0 : PG_FREE_IF_COPY(query, 1);
1990 :
1991 0 : PG_RETURN_BOOL(res);
1992 : }
1993 :
1994 : /*
1995 : * ts_stat statistic function support
1996 : */
1997 :
1998 :
1999 : /*
2000 : * Returns the number of positions in value 'wptr' within tsvector 'txt',
2001 : * that have a weight equal to one of the weights in 'weight' bitmask.
2002 : */
2003 : static int
2004 1 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
2005 : {
2006 1 : int len = POSDATALEN(txt, wptr);
2007 1 : int num = 0;
2008 1 : WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2009 :
2010 6 : while (len--)
2011 : {
2012 4 : if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2013 2 : num++;
2014 4 : ptr++;
2015 : }
2016 1 : return num;
2017 : }
2018 :
2019 : #define compareStatWord(a,e,t) \
2020 : tsCompareString((a)->lexeme, (a)->lenlexeme, \
2021 : STRPTR(t) + (e)->pos, (e)->len, \
2022 : false)
2023 :
2024 : static void
2025 57638 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
2026 : {
2027 57638 : WordEntry *we = ARRPTR(txt) + off;
2028 57638 : StatEntry *node = stat->root,
2029 57638 : *pnode = NULL;
2030 : int n,
2031 57638 : res = 0;
2032 57638 : uint32 depth = 1;
2033 :
2034 57638 : if (stat->weight == 0)
2035 28819 : n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2036 : else
2037 28819 : n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2038 :
2039 57638 : if (n == 0)
2040 86456 : return; /* nothing to insert */
2041 :
2042 319871 : while (node)
2043 : {
2044 289907 : res = compareStatWord(node, we, txt);
2045 :
2046 289907 : if (res == 0)
2047 : {
2048 27676 : break;
2049 : }
2050 : else
2051 : {
2052 262231 : pnode = node;
2053 262231 : node = (res < 0) ? node->left : node->right;
2054 : }
2055 262231 : depth++;
2056 : }
2057 :
2058 28820 : if (depth > stat->maxdepth)
2059 21 : stat->maxdepth = depth;
2060 :
2061 28820 : if (node == NULL)
2062 : {
2063 1144 : node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2064 1144 : node->left = node->right = NULL;
2065 1144 : node->ndoc = 1;
2066 1144 : node->nentry = n;
2067 1144 : node->lenlexeme = we->len;
2068 1144 : memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2069 :
2070 1144 : if (pnode == NULL)
2071 : {
2072 2 : stat->root = node;
2073 : }
2074 : else
2075 : {
2076 1142 : if (res < 0)
2077 564 : pnode->left = node;
2078 : else
2079 578 : pnode->right = node;
2080 : }
2081 :
2082 : }
2083 : else
2084 : {
2085 27676 : node->ndoc++;
2086 27676 : node->nentry += n;
2087 : }
2088 : }
2089 :
2090 : static void
2091 82692 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
2092 : uint32 low, uint32 high, uint32 offset)
2093 : {
2094 : uint32 pos;
2095 82692 : uint32 middle = (low + high) >> 1;
2096 :
2097 82692 : pos = (low + middle) >> 1;
2098 82692 : if (low != middle && pos >= offset && pos - offset < txt->size)
2099 28406 : insertStatEntry(persistentContext, stat, txt, pos - offset);
2100 82692 : pos = (high + middle + 1) >> 1;
2101 82692 : if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2102 28230 : insertStatEntry(persistentContext, stat, txt, pos - offset);
2103 :
2104 82692 : if (low != middle)
2105 41346 : chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2106 82692 : if (high != middle + 1)
2107 40344 : chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2108 82692 : }
2109 :
2110 : /*
2111 : * This is written like a custom aggregate function, because the
2112 : * original plan was to do just that. Unfortunately, an aggregate function
2113 : * can't return a set, so that plan was abandoned. If that limitation is
2114 : * lifted in the future, ts_stat could be a real aggregate function so that
2115 : * you could use it like this:
2116 : *
2117 : * SELECT ts_stat(vector_column) FROM vector_table;
2118 : *
2119 : * where vector_column is a tsvector-type column in vector_table.
2120 : */
2121 :
2122 : static TSVectorStat *
2123 1018 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
2124 : {
2125 1018 : TSVector txt = DatumGetTSVector(data);
2126 : uint32 i,
2127 1018 : nbit = 0,
2128 : offset;
2129 :
2130 1018 : if (stat == NULL)
2131 : { /* Init in first */
2132 0 : stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2133 0 : stat->maxdepth = 1;
2134 : }
2135 :
2136 : /* simple check of correctness */
2137 1018 : if (txt == NULL || txt->size == 0)
2138 : {
2139 16 : if (txt && txt != (TSVector) DatumGetPointer(data))
2140 16 : pfree(txt);
2141 16 : return stat;
2142 : }
2143 :
2144 1002 : i = txt->size - 1;
2145 7122 : for (; i > 0; i >>= 1)
2146 6120 : nbit++;
2147 :
2148 1002 : nbit = 1 << nbit;
2149 1002 : offset = (nbit - txt->size) / 2;
2150 :
2151 1002 : insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2152 1002 : chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
2153 :
2154 1002 : return stat;
2155 : }
2156 :
2157 : static void
2158 2 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
2159 : TSVectorStat *stat)
2160 : {
2161 : TupleDesc tupdesc;
2162 : MemoryContext oldcontext;
2163 : StatEntry *node;
2164 :
2165 2 : funcctx->user_fctx = (void *) stat;
2166 :
2167 2 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2168 :
2169 2 : stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
2170 2 : stat->stackpos = 0;
2171 :
2172 2 : node = stat->root;
2173 : /* find leftmost value */
2174 2 : if (node == NULL)
2175 0 : stat->stack[stat->stackpos] = NULL;
2176 : else
2177 : for (;;)
2178 : {
2179 8 : stat->stack[stat->stackpos] = node;
2180 8 : if (node->left)
2181 : {
2182 6 : stat->stackpos++;
2183 6 : node = node->left;
2184 : }
2185 : else
2186 2 : break;
2187 6 : }
2188 2 : Assert(stat->stackpos <= stat->maxdepth);
2189 :
2190 2 : tupdesc = CreateTemplateTupleDesc(3, false);
2191 2 : TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
2192 : TEXTOID, -1, 0);
2193 2 : TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
2194 : INT4OID, -1, 0);
2195 2 : TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
2196 : INT4OID, -1, 0);
2197 2 : funcctx->tuple_desc = BlessTupleDesc(tupdesc);
2198 2 : funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2199 :
2200 2 : MemoryContextSwitchTo(oldcontext);
2201 2 : }
2202 :
2203 : static StatEntry *
2204 2288 : walkStatEntryTree(TSVectorStat *stat)
2205 : {
2206 2288 : StatEntry *node = stat->stack[stat->stackpos];
2207 :
2208 2288 : if (node == NULL)
2209 0 : return NULL;
2210 :
2211 2288 : if (node->ndoc != 0)
2212 : {
2213 : /* return entry itself: we already was at left sublink */
2214 566 : return node;
2215 : }
2216 1722 : else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2217 : {
2218 : /* go on right sublink */
2219 578 : stat->stackpos++;
2220 578 : node = node->right;
2221 :
2222 : /* find most-left value */
2223 : for (;;)
2224 : {
2225 1136 : stat->stack[stat->stackpos] = node;
2226 1136 : if (node->left)
2227 : {
2228 558 : stat->stackpos++;
2229 558 : node = node->left;
2230 : }
2231 : else
2232 578 : break;
2233 558 : }
2234 578 : Assert(stat->stackpos <= stat->maxdepth);
2235 : }
2236 : else
2237 : {
2238 : /* we already return all left subtree, itself and right subtree */
2239 1144 : if (stat->stackpos == 0)
2240 2 : return NULL;
2241 :
2242 1142 : stat->stackpos--;
2243 1142 : return walkStatEntryTree(stat);
2244 : }
2245 :
2246 578 : return node;
2247 : }
2248 :
2249 : static Datum
2250 1146 : ts_process_call(FuncCallContext *funcctx)
2251 : {
2252 : TSVectorStat *st;
2253 : StatEntry *entry;
2254 :
2255 1146 : st = (TSVectorStat *) funcctx->user_fctx;
2256 :
2257 1146 : entry = walkStatEntryTree(st);
2258 :
2259 1146 : if (entry != NULL)
2260 : {
2261 : Datum result;
2262 : char *values[3];
2263 : char ndoc[16];
2264 : char nentry[16];
2265 : HeapTuple tuple;
2266 :
2267 1144 : values[0] = palloc(entry->lenlexeme + 1);
2268 1144 : memcpy(values[0], entry->lexeme, entry->lenlexeme);
2269 1144 : (values[0])[entry->lenlexeme] = '\0';
2270 1144 : sprintf(ndoc, "%d", entry->ndoc);
2271 1144 : values[1] = ndoc;
2272 1144 : sprintf(nentry, "%d", entry->nentry);
2273 1144 : values[2] = nentry;
2274 :
2275 1144 : tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2276 1144 : result = HeapTupleGetDatum(tuple);
2277 :
2278 1144 : pfree(values[0]);
2279 :
2280 : /* mark entry as already visited */
2281 1144 : entry->ndoc = 0;
2282 :
2283 1144 : return result;
2284 : }
2285 :
2286 2 : return (Datum) 0;
2287 : }
2288 :
2289 : static TSVectorStat *
2290 2 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
2291 : {
2292 2 : char *query = text_to_cstring(txt);
2293 : TSVectorStat *stat;
2294 : bool isnull;
2295 : Portal portal;
2296 : SPIPlanPtr plan;
2297 :
2298 2 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2299 : /* internal error */
2300 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2301 :
2302 2 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2303 : /* internal error */
2304 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2305 :
2306 2 : SPI_cursor_fetch(portal, true, 100);
2307 :
2308 4 : if (SPI_tuptable == NULL ||
2309 4 : SPI_tuptable->tupdesc->natts != 1 ||
2310 2 : !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
2311 : TSVECTOROID))
2312 0 : ereport(ERROR,
2313 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2314 : errmsg("ts_stat query must return one tsvector column")));
2315 :
2316 2 : stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2317 2 : stat->maxdepth = 1;
2318 :
2319 2 : if (ws)
2320 : {
2321 : char *buf;
2322 :
2323 1 : buf = VARDATA_ANY(ws);
2324 4 : while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2325 : {
2326 2 : if (pg_mblen(buf) == 1)
2327 : {
2328 2 : switch (*buf)
2329 : {
2330 : case 'A':
2331 : case 'a':
2332 1 : stat->weight |= 1 << 3;
2333 1 : break;
2334 : case 'B':
2335 : case 'b':
2336 1 : stat->weight |= 1 << 2;
2337 1 : break;
2338 : case 'C':
2339 : case 'c':
2340 0 : stat->weight |= 1 << 1;
2341 0 : break;
2342 : case 'D':
2343 : case 'd':
2344 0 : stat->weight |= 1;
2345 0 : break;
2346 : default:
2347 0 : stat->weight |= 0;
2348 : }
2349 : }
2350 2 : buf += pg_mblen(buf);
2351 : }
2352 : }
2353 :
2354 16 : while (SPI_processed > 0)
2355 : {
2356 : uint64 i;
2357 :
2358 1030 : for (i = 0; i < SPI_processed; i++)
2359 : {
2360 1018 : Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
2361 :
2362 1018 : if (!isnull)
2363 1018 : stat = ts_accum(persistentContext, stat, data);
2364 : }
2365 :
2366 12 : SPI_freetuptable(SPI_tuptable);
2367 12 : SPI_cursor_fetch(portal, true, 100);
2368 : }
2369 :
2370 2 : SPI_freetuptable(SPI_tuptable);
2371 2 : SPI_cursor_close(portal);
2372 2 : SPI_freeplan(plan);
2373 2 : pfree(query);
2374 :
2375 2 : return stat;
2376 : }
2377 :
2378 : Datum
2379 1144 : ts_stat1(PG_FUNCTION_ARGS)
2380 : {
2381 : FuncCallContext *funcctx;
2382 : Datum result;
2383 :
2384 1144 : if (SRF_IS_FIRSTCALL())
2385 : {
2386 : TSVectorStat *stat;
2387 1 : text *txt = PG_GETARG_TEXT_PP(0);
2388 :
2389 1 : funcctx = SRF_FIRSTCALL_INIT();
2390 1 : SPI_connect();
2391 1 : stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2392 1 : PG_FREE_IF_COPY(txt, 0);
2393 1 : ts_setup_firstcall(fcinfo, funcctx, stat);
2394 1 : SPI_finish();
2395 : }
2396 :
2397 1144 : funcctx = SRF_PERCALL_SETUP();
2398 1144 : if ((result = ts_process_call(funcctx)) != (Datum) 0)
2399 1143 : SRF_RETURN_NEXT(funcctx, result);
2400 1 : SRF_RETURN_DONE(funcctx);
2401 : }
2402 :
2403 : Datum
2404 2 : ts_stat2(PG_FUNCTION_ARGS)
2405 : {
2406 : FuncCallContext *funcctx;
2407 : Datum result;
2408 :
2409 2 : if (SRF_IS_FIRSTCALL())
2410 : {
2411 : TSVectorStat *stat;
2412 1 : text *txt = PG_GETARG_TEXT_PP(0);
2413 1 : text *ws = PG_GETARG_TEXT_PP(1);
2414 :
2415 1 : funcctx = SRF_FIRSTCALL_INIT();
2416 1 : SPI_connect();
2417 1 : stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2418 1 : PG_FREE_IF_COPY(txt, 0);
2419 1 : PG_FREE_IF_COPY(ws, 1);
2420 1 : ts_setup_firstcall(fcinfo, funcctx, stat);
2421 1 : SPI_finish();
2422 : }
2423 :
2424 2 : funcctx = SRF_PERCALL_SETUP();
2425 2 : if ((result = ts_process_call(funcctx)) != (Datum) 0)
2426 1 : SRF_RETURN_NEXT(funcctx, result);
2427 1 : SRF_RETURN_DONE(funcctx);
2428 : }
2429 :
2430 :
2431 : /*
2432 : * Triggers for automatic update of a tsvector column from text column(s)
2433 : *
2434 : * Trigger arguments are either
2435 : * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2436 : * name of tsvector col, name of regconfig col, name(s) of text col(s)
2437 : * ie, tsconfig can either be specified by name, or indirectly as the
2438 : * contents of a regconfig field in the row. If the name is used, it must
2439 : * be explicitly schema-qualified.
2440 : */
2441 : Datum
2442 3 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
2443 : {
2444 3 : return tsvector_update_trigger(fcinfo, false);
2445 : }
2446 :
2447 : Datum
2448 0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
2449 : {
2450 0 : return tsvector_update_trigger(fcinfo, true);
2451 : }
2452 :
2453 : static Datum
2454 3 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
2455 : {
2456 : TriggerData *trigdata;
2457 : Trigger *trigger;
2458 : Relation rel;
2459 3 : HeapTuple rettuple = NULL;
2460 : int tsvector_attr_num,
2461 : i;
2462 : ParsedText prs;
2463 : Datum datum;
2464 : bool isnull;
2465 : text *txt;
2466 : Oid cfgId;
2467 :
2468 : /* Check call context */
2469 3 : if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2470 0 : elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2471 :
2472 3 : trigdata = (TriggerData *) fcinfo->context;
2473 3 : if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2474 0 : elog(ERROR, "tsvector_update_trigger: must be fired for row");
2475 3 : if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2476 0 : elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2477 :
2478 3 : if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2479 2 : rettuple = trigdata->tg_trigtuple;
2480 1 : else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2481 1 : rettuple = trigdata->tg_newtuple;
2482 : else
2483 0 : elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2484 :
2485 3 : trigger = trigdata->tg_trigger;
2486 3 : rel = trigdata->tg_relation;
2487 :
2488 3 : if (trigger->tgnargs < 3)
2489 0 : elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2490 :
2491 : /* Find the target tsvector column */
2492 3 : tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2493 3 : if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2494 0 : ereport(ERROR,
2495 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2496 : errmsg("tsvector column \"%s\" does not exist",
2497 : trigger->tgargs[0])));
2498 : /* This will effectively reject system columns, so no separate test: */
2499 3 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2500 : TSVECTOROID))
2501 0 : ereport(ERROR,
2502 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2503 : errmsg("column \"%s\" is not of tsvector type",
2504 : trigger->tgargs[0])));
2505 :
2506 : /* Find the configuration to use */
2507 3 : if (config_column)
2508 : {
2509 : int config_attr_num;
2510 :
2511 0 : config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2512 0 : if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2513 0 : ereport(ERROR,
2514 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2515 : errmsg("configuration column \"%s\" does not exist",
2516 : trigger->tgargs[1])));
2517 0 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2518 : REGCONFIGOID))
2519 0 : ereport(ERROR,
2520 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2521 : errmsg("column \"%s\" is not of regconfig type",
2522 : trigger->tgargs[1])));
2523 :
2524 0 : datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2525 0 : if (isnull)
2526 0 : ereport(ERROR,
2527 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2528 : errmsg("configuration column \"%s\" must not be null",
2529 : trigger->tgargs[1])));
2530 0 : cfgId = DatumGetObjectId(datum);
2531 : }
2532 : else
2533 : {
2534 : List *names;
2535 :
2536 3 : names = stringToQualifiedNameList(trigger->tgargs[1]);
2537 : /* require a schema so that results are not search path dependent */
2538 3 : if (list_length(names) < 2)
2539 0 : ereport(ERROR,
2540 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2541 : errmsg("text search configuration name \"%s\" must be schema-qualified",
2542 : trigger->tgargs[1])));
2543 3 : cfgId = get_ts_config_oid(names, false);
2544 : }
2545 :
2546 : /* initialize parse state */
2547 3 : prs.lenwords = 32;
2548 3 : prs.curwords = 0;
2549 3 : prs.pos = 0;
2550 3 : prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
2551 :
2552 : /* find all words in indexable column(s) */
2553 6 : for (i = 2; i < trigger->tgnargs; i++)
2554 : {
2555 : int numattr;
2556 :
2557 3 : numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2558 3 : if (numattr == SPI_ERROR_NOATTRIBUTE)
2559 0 : ereport(ERROR,
2560 : (errcode(ERRCODE_UNDEFINED_COLUMN),
2561 : errmsg("column \"%s\" does not exist",
2562 : trigger->tgargs[i])));
2563 3 : if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2564 0 : ereport(ERROR,
2565 : (errcode(ERRCODE_DATATYPE_MISMATCH),
2566 : errmsg("column \"%s\" is not of a character type",
2567 : trigger->tgargs[i])));
2568 :
2569 3 : datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2570 3 : if (isnull)
2571 1 : continue;
2572 :
2573 2 : txt = DatumGetTextPP(datum);
2574 :
2575 2 : parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2576 :
2577 2 : if (txt != (text *) DatumGetPointer(datum))
2578 0 : pfree(txt);
2579 : }
2580 :
2581 : /* make tsvector value */
2582 3 : datum = TSVectorGetDatum(make_tsvector(&prs));
2583 3 : isnull = false;
2584 :
2585 : /* and insert it into tuple */
2586 3 : rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2587 : 1, &tsvector_attr_num,
2588 : &datum, &isnull);
2589 :
2590 3 : pfree(DatumGetPointer(datum));
2591 :
2592 3 : return PointerGetDatum(rettuple);
2593 : }
|