Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * like.c
4 : * like expression handling code.
5 : *
6 : * NOTES
7 : * A big hack of the regexp.c code!! Contributed by
8 : * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9 : *
10 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
11 : * Portions Copyright (c) 1994, Regents of the University of California
12 : *
13 : * IDENTIFICATION
14 : * src/backend/utils/adt/like.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 : #include "postgres.h"
19 :
20 : #include <ctype.h>
21 :
22 : #include "catalog/pg_collation.h"
23 : #include "mb/pg_wchar.h"
24 : #include "miscadmin.h"
25 : #include "utils/builtins.h"
26 : #include "utils/pg_locale.h"
27 :
28 :
29 : #define LIKE_TRUE 1
30 : #define LIKE_FALSE 0
31 : #define LIKE_ABORT (-1)
32 :
33 :
34 : static int SB_MatchText(char *t, int tlen, char *p, int plen,
35 : pg_locale_t locale, bool locale_is_c);
36 : static text *SB_do_like_escape(text *, text *);
37 :
38 : static int MB_MatchText(char *t, int tlen, char *p, int plen,
39 : pg_locale_t locale, bool locale_is_c);
40 : static text *MB_do_like_escape(text *, text *);
41 :
42 : static int UTF8_MatchText(char *t, int tlen, char *p, int plen,
43 : pg_locale_t locale, bool locale_is_c);
44 :
45 : static int SB_IMatchText(char *t, int tlen, char *p, int plen,
46 : pg_locale_t locale, bool locale_is_c);
47 :
48 : static int GenericMatchText(char *s, int slen, char *p, int plen);
49 : static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
50 :
51 : /*--------------------
52 : * Support routine for MatchText. Compares given multibyte streams
53 : * as wide characters. If they match, returns 1 otherwise returns 0.
54 : *--------------------
55 : */
56 : static inline int
57 154 : wchareq(char *p1, char *p2)
58 : {
59 : int p1_len;
60 :
61 : /* Optimization: quickly compare the first byte. */
62 154 : if (*p1 != *p2)
63 116 : return 0;
64 :
65 38 : p1_len = pg_mblen(p1);
66 38 : if (pg_mblen(p2) != p1_len)
67 0 : return 0;
68 :
69 : /* They are the same length */
70 114 : while (p1_len--)
71 : {
72 38 : if (*p1++ != *p2++)
73 0 : return 0;
74 : }
75 38 : return 1;
76 : }
77 :
78 : /*
79 : * Formerly we had a routine iwchareq() here that tried to do case-insensitive
80 : * comparison of multibyte characters. It did not work at all, however,
81 : * because it relied on tolower() which has a single-byte API ... and
82 : * towlower() wouldn't be much better since we have no suitably cheap way
83 : * of getting a single character transformed to the system's wchar_t format.
84 : * So now, we just downcase the strings using lower() and apply regular LIKE
85 : * comparison. This should be revisited when we install better locale support.
86 : */
87 :
88 : /*
89 : * We do handle case-insensitive matching for single-byte encodings using
90 : * fold-on-the-fly processing, however.
91 : */
92 : static char
93 0 : SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
94 : {
95 0 : if (locale_is_c)
96 0 : return pg_ascii_tolower(c);
97 : #ifdef HAVE_LOCALE_T
98 0 : else if (locale)
99 0 : return tolower_l(c, locale->info.lt);
100 : #endif
101 : else
102 0 : return pg_tolower(c);
103 : }
104 :
105 :
106 : #define NextByte(p, plen) ((p)++, (plen)--)
107 :
108 : /* Set up to compile like_match.c for multibyte characters */
109 : #define CHAREQ(p1, p2) wchareq((p1), (p2))
110 : #define NextChar(p, plen) \
111 : do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
112 : #define CopyAdvChar(dst, src, srclen) \
113 : do { int __l = pg_mblen(src); \
114 : (srclen) -= __l; \
115 : while (__l-- > 0) \
116 : *(dst)++ = *(src)++; \
117 : } while (0)
118 :
119 : #define MatchText MB_MatchText
120 : #define do_like_escape MB_do_like_escape
121 :
122 : #include "like_match.c"
123 :
124 : /* Set up to compile like_match.c for single-byte characters */
125 : #define CHAREQ(p1, p2) (*(p1) == *(p2))
126 : #define NextChar(p, plen) NextByte((p), (plen))
127 : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
128 :
129 : #define MatchText SB_MatchText
130 : #define do_like_escape SB_do_like_escape
131 :
132 : #include "like_match.c"
133 :
134 : /* setup to compile like_match.c for single byte case insensitive matches */
135 : #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
136 : #define NextChar(p, plen) NextByte((p), (plen))
137 : #define MatchText SB_IMatchText
138 :
139 : #include "like_match.c"
140 :
141 : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
142 :
143 : #define NextChar(p, plen) \
144 : do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
145 : #define MatchText UTF8_MatchText
146 :
147 : #include "like_match.c"
148 :
149 : /* Generic for all cases not requiring inline case-folding */
150 : static inline int
151 48364 : GenericMatchText(char *s, int slen, char *p, int plen)
152 : {
153 48364 : if (pg_database_encoding_max_length() == 1)
154 0 : return SB_MatchText(s, slen, p, plen, 0, true);
155 48364 : else if (GetDatabaseEncoding() == PG_UTF8)
156 48364 : return UTF8_MatchText(s, slen, p, plen, 0, true);
157 : else
158 0 : return MB_MatchText(s, slen, p, plen, 0, true);
159 : }
160 :
161 : static inline int
162 12 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
163 : {
164 : char *s,
165 : *p;
166 : int slen,
167 : plen;
168 12 : pg_locale_t locale = 0;
169 12 : bool locale_is_c = false;
170 :
171 12 : if (lc_ctype_is_c(collation))
172 0 : locale_is_c = true;
173 12 : else if (collation != DEFAULT_COLLATION_OID)
174 : {
175 0 : if (!OidIsValid(collation))
176 : {
177 : /*
178 : * This typically means that the parser could not resolve a
179 : * conflict of implicit collations, so report it that way.
180 : */
181 0 : ereport(ERROR,
182 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
183 : errmsg("could not determine which collation to use for ILIKE"),
184 : errhint("Use the COLLATE clause to set the collation explicitly.")));
185 : }
186 0 : locale = pg_newlocale_from_collation(collation);
187 : }
188 :
189 : /*
190 : * For efficiency reasons, in the single byte case we don't call lower()
191 : * on the pattern and text, but instead call SB_lower_char on each
192 : * character. In the multi-byte case we don't have much choice :-(. Also,
193 : * ICU does not support single-character case folding, so we go the long
194 : * way.
195 : */
196 :
197 12 : if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
198 : {
199 : /* lower's result is never packed, so OK to use old macros here */
200 12 : pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
201 : PointerGetDatum(pat)));
202 12 : p = VARDATA_ANY(pat);
203 12 : plen = VARSIZE_ANY_EXHDR(pat);
204 12 : str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
205 : PointerGetDatum(str)));
206 12 : s = VARDATA_ANY(str);
207 12 : slen = VARSIZE_ANY_EXHDR(str);
208 12 : if (GetDatabaseEncoding() == PG_UTF8)
209 12 : return UTF8_MatchText(s, slen, p, plen, 0, true);
210 : else
211 0 : return MB_MatchText(s, slen, p, plen, 0, true);
212 : }
213 : else
214 : {
215 0 : p = VARDATA_ANY(pat);
216 0 : plen = VARSIZE_ANY_EXHDR(pat);
217 0 : s = VARDATA_ANY(str);
218 0 : slen = VARSIZE_ANY_EXHDR(str);
219 0 : return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
220 : }
221 : }
222 :
223 : /*
224 : * interface routines called by the function manager
225 : */
226 :
227 : Datum
228 17428 : namelike(PG_FUNCTION_ARGS)
229 : {
230 17428 : Name str = PG_GETARG_NAME(0);
231 17428 : text *pat = PG_GETARG_TEXT_PP(1);
232 : bool result;
233 : char *s,
234 : *p;
235 : int slen,
236 : plen;
237 :
238 17428 : s = NameStr(*str);
239 17428 : slen = strlen(s);
240 17428 : p = VARDATA_ANY(pat);
241 17428 : plen = VARSIZE_ANY_EXHDR(pat);
242 :
243 17428 : result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
244 :
245 17428 : PG_RETURN_BOOL(result);
246 : }
247 :
248 : Datum
249 598 : namenlike(PG_FUNCTION_ARGS)
250 : {
251 598 : Name str = PG_GETARG_NAME(0);
252 598 : text *pat = PG_GETARG_TEXT_PP(1);
253 : bool result;
254 : char *s,
255 : *p;
256 : int slen,
257 : plen;
258 :
259 598 : s = NameStr(*str);
260 598 : slen = strlen(s);
261 598 : p = VARDATA_ANY(pat);
262 598 : plen = VARSIZE_ANY_EXHDR(pat);
263 :
264 598 : result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
265 :
266 598 : PG_RETURN_BOOL(result);
267 : }
268 :
269 : Datum
270 12688 : textlike(PG_FUNCTION_ARGS)
271 : {
272 12688 : text *str = PG_GETARG_TEXT_PP(0);
273 12688 : text *pat = PG_GETARG_TEXT_PP(1);
274 : bool result;
275 : char *s,
276 : *p;
277 : int slen,
278 : plen;
279 :
280 12688 : s = VARDATA_ANY(str);
281 12688 : slen = VARSIZE_ANY_EXHDR(str);
282 12688 : p = VARDATA_ANY(pat);
283 12688 : plen = VARSIZE_ANY_EXHDR(pat);
284 :
285 12688 : result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
286 :
287 12688 : PG_RETURN_BOOL(result);
288 : }
289 :
290 : Datum
291 17650 : textnlike(PG_FUNCTION_ARGS)
292 : {
293 17650 : text *str = PG_GETARG_TEXT_PP(0);
294 17650 : text *pat = PG_GETARG_TEXT_PP(1);
295 : bool result;
296 : char *s,
297 : *p;
298 : int slen,
299 : plen;
300 :
301 17650 : s = VARDATA_ANY(str);
302 17650 : slen = VARSIZE_ANY_EXHDR(str);
303 17650 : p = VARDATA_ANY(pat);
304 17650 : plen = VARSIZE_ANY_EXHDR(pat);
305 :
306 17650 : result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
307 :
308 17650 : PG_RETURN_BOOL(result);
309 : }
310 :
311 : Datum
312 0 : bytealike(PG_FUNCTION_ARGS)
313 : {
314 0 : bytea *str = PG_GETARG_BYTEA_PP(0);
315 0 : bytea *pat = PG_GETARG_BYTEA_PP(1);
316 : bool result;
317 : char *s,
318 : *p;
319 : int slen,
320 : plen;
321 :
322 0 : s = VARDATA_ANY(str);
323 0 : slen = VARSIZE_ANY_EXHDR(str);
324 0 : p = VARDATA_ANY(pat);
325 0 : plen = VARSIZE_ANY_EXHDR(pat);
326 :
327 0 : result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
328 :
329 0 : PG_RETURN_BOOL(result);
330 : }
331 :
332 : Datum
333 0 : byteanlike(PG_FUNCTION_ARGS)
334 : {
335 0 : bytea *str = PG_GETARG_BYTEA_PP(0);
336 0 : bytea *pat = PG_GETARG_BYTEA_PP(1);
337 : bool result;
338 : char *s,
339 : *p;
340 : int slen,
341 : plen;
342 :
343 0 : s = VARDATA_ANY(str);
344 0 : slen = VARSIZE_ANY_EXHDR(str);
345 0 : p = VARDATA_ANY(pat);
346 0 : plen = VARSIZE_ANY_EXHDR(pat);
347 :
348 0 : result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
349 :
350 0 : PG_RETURN_BOOL(result);
351 : }
352 :
353 : /*
354 : * Case-insensitive versions
355 : */
356 :
357 : Datum
358 0 : nameiclike(PG_FUNCTION_ARGS)
359 : {
360 0 : Name str = PG_GETARG_NAME(0);
361 0 : text *pat = PG_GETARG_TEXT_PP(1);
362 : bool result;
363 : text *strtext;
364 :
365 0 : strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
366 : NameGetDatum(str)));
367 0 : result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
368 :
369 0 : PG_RETURN_BOOL(result);
370 : }
371 :
372 : Datum
373 0 : nameicnlike(PG_FUNCTION_ARGS)
374 : {
375 0 : Name str = PG_GETARG_NAME(0);
376 0 : text *pat = PG_GETARG_TEXT_PP(1);
377 : bool result;
378 : text *strtext;
379 :
380 0 : strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
381 : NameGetDatum(str)));
382 0 : result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
383 :
384 0 : PG_RETURN_BOOL(result);
385 : }
386 :
387 : Datum
388 8 : texticlike(PG_FUNCTION_ARGS)
389 : {
390 8 : text *str = PG_GETARG_TEXT_PP(0);
391 8 : text *pat = PG_GETARG_TEXT_PP(1);
392 : bool result;
393 :
394 8 : result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
395 :
396 8 : PG_RETURN_BOOL(result);
397 : }
398 :
399 : Datum
400 4 : texticnlike(PG_FUNCTION_ARGS)
401 : {
402 4 : text *str = PG_GETARG_TEXT_PP(0);
403 4 : text *pat = PG_GETARG_TEXT_PP(1);
404 : bool result;
405 :
406 4 : result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
407 :
408 4 : PG_RETURN_BOOL(result);
409 : }
410 :
411 : /*
412 : * like_escape() --- given a pattern and an ESCAPE string,
413 : * convert the pattern to use Postgres' standard backslash escape convention.
414 : */
415 : Datum
416 30 : like_escape(PG_FUNCTION_ARGS)
417 : {
418 30 : text *pat = PG_GETARG_TEXT_PP(0);
419 30 : text *esc = PG_GETARG_TEXT_PP(1);
420 : text *result;
421 :
422 30 : if (pg_database_encoding_max_length() == 1)
423 0 : result = SB_do_like_escape(pat, esc);
424 : else
425 30 : result = MB_do_like_escape(pat, esc);
426 :
427 30 : PG_RETURN_TEXT_P(result);
428 : }
429 :
430 : /*
431 : * like_escape_bytea() --- given a pattern and an ESCAPE string,
432 : * convert the pattern to use Postgres' standard backslash escape convention.
433 : */
434 : Datum
435 0 : like_escape_bytea(PG_FUNCTION_ARGS)
436 : {
437 0 : bytea *pat = PG_GETARG_BYTEA_PP(0);
438 0 : bytea *esc = PG_GETARG_BYTEA_PP(1);
439 0 : bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
440 :
441 0 : PG_RETURN_BYTEA_P((bytea *) result);
442 : }
|