LCOV - PostgreSQL - src/backend/utils/adt/like.c

LCOV - code coverage report

Current view:	top level - src/backend/utils/adt - like.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL	Lines:	78	134	58.2 %
Date:	2017-09-29 15:12:54	Functions:	10	16	62.5 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * like.c
       4             :  *    like expression handling code.
       5             :  *
       6             :  *   NOTES
       7             :  *      A big hack of the regexp.c code!! Contributed by
       8             :  *      Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
       9             :  *
      10             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      11             :  * Portions Copyright (c) 1994, Regents of the University of California
      12             :  *
      13             :  * IDENTIFICATION
      14             :  *  src/backend/utils/adt/like.c
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #include "postgres.h"
      19             : 
      20             : #include <ctype.h>
      21             : 
      22             : #include "catalog/pg_collation.h"
      23             : #include "mb/pg_wchar.h"
      24             : #include "miscadmin.h"
      25             : #include "utils/builtins.h"
      26             : #include "utils/pg_locale.h"
      27             : 
      28             : 
      29             : #define LIKE_TRUE                       1
      30             : #define LIKE_FALSE                      0
      31             : #define LIKE_ABORT                      (-1)
      32             : 
      33             : 
      34             : static int SB_MatchText(char *t, int tlen, char *p, int plen,
      35             :              pg_locale_t locale, bool locale_is_c);
      36             : static text *SB_do_like_escape(text *, text *);
      37             : 
      38             : static int MB_MatchText(char *t, int tlen, char *p, int plen,
      39             :              pg_locale_t locale, bool locale_is_c);
      40             : static text *MB_do_like_escape(text *, text *);
      41             : 
      42             : static int UTF8_MatchText(char *t, int tlen, char *p, int plen,
      43             :                pg_locale_t locale, bool locale_is_c);
      44             : 
      45             : static int SB_IMatchText(char *t, int tlen, char *p, int plen,
      46             :               pg_locale_t locale, bool locale_is_c);
      47             : 
      48             : static int  GenericMatchText(char *s, int slen, char *p, int plen);
      49             : static int  Generic_Text_IC_like(text *str, text *pat, Oid collation);
      50             : 
      51             : /*--------------------
      52             :  * Support routine for MatchText. Compares given multibyte streams
      53             :  * as wide characters. If they match, returns 1 otherwise returns 0.
      54             :  *--------------------
      55             :  */
      56             : static inline int
      57         154 : wchareq(char *p1, char *p2)
      58             : {
      59             :     int         p1_len;
      60             : 
      61             :     /* Optimization:  quickly compare the first byte. */
      62         154 :     if (*p1 != *p2)
      63         116 :         return 0;
      64             : 
      65          38 :     p1_len = pg_mblen(p1);
      66          38 :     if (pg_mblen(p2) != p1_len)
      67           0 :         return 0;
      68             : 
      69             :     /* They are the same length */
      70         114 :     while (p1_len--)
      71             :     {
      72          38 :         if (*p1++ != *p2++)
      73           0 :             return 0;
      74             :     }
      75          38 :     return 1;
      76             : }
      77             : 
      78             : /*
      79             :  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
      80             :  * comparison of multibyte characters.  It did not work at all, however,
      81             :  * because it relied on tolower() which has a single-byte API ... and
      82             :  * towlower() wouldn't be much better since we have no suitably cheap way
      83             :  * of getting a single character transformed to the system's wchar_t format.
      84             :  * So now, we just downcase the strings using lower() and apply regular LIKE
      85             :  * comparison.  This should be revisited when we install better locale support.
      86             :  */
      87             : 
      88             : /*
      89             :  * We do handle case-insensitive matching for single-byte encodings using
      90             :  * fold-on-the-fly processing, however.
      91             :  */
      92             : static char
      93           0 : SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
      94             : {
      95           0 :     if (locale_is_c)
      96           0 :         return pg_ascii_tolower(c);
      97             : #ifdef HAVE_LOCALE_T
      98           0 :     else if (locale)
      99           0 :         return tolower_l(c, locale->info.lt);
     100             : #endif
     101             :     else
     102           0 :         return pg_tolower(c);
     103             : }
     104             : 
     105             : 
     106             : #define NextByte(p, plen)   ((p)++, (plen)--)
     107             : 
     108             : /* Set up to compile like_match.c for multibyte characters */
     109             : #define CHAREQ(p1, p2) wchareq((p1), (p2))
     110             : #define NextChar(p, plen) \
     111             :     do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
     112             : #define CopyAdvChar(dst, src, srclen) \
     113             :     do { int __l = pg_mblen(src); \
     114             :          (srclen) -= __l; \
     115             :          while (__l-- > 0) \
     116             :              *(dst)++ = *(src)++; \
     117             :        } while (0)
     118             : 
     119             : #define MatchText   MB_MatchText
     120             : #define do_like_escape  MB_do_like_escape
     121             : 
     122             : #include "like_match.c"
     123             : 
     124             : /* Set up to compile like_match.c for single-byte characters */
     125             : #define CHAREQ(p1, p2) (*(p1) == *(p2))
     126             : #define NextChar(p, plen) NextByte((p), (plen))
     127             : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
     128             : 
     129             : #define MatchText   SB_MatchText
     130             : #define do_like_escape  SB_do_like_escape
     131             : 
     132             : #include "like_match.c"
     133             : 
     134             : /* setup to compile like_match.c for single byte case insensitive matches */
     135             : #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
     136             : #define NextChar(p, plen) NextByte((p), (plen))
     137             : #define MatchText SB_IMatchText
     138             : 
     139             : #include "like_match.c"
     140             : 
     141             : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
     142             : 
     143             : #define NextChar(p, plen) \
     144             :     do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
     145             : #define MatchText   UTF8_MatchText
     146             : 
     147             : #include "like_match.c"
     148             : 
     149             : /* Generic for all cases not requiring inline case-folding */
     150             : static inline int
     151       48364 : GenericMatchText(char *s, int slen, char *p, int plen)
     152             : {
     153       48364 :     if (pg_database_encoding_max_length() == 1)
     154           0 :         return SB_MatchText(s, slen, p, plen, 0, true);
     155       48364 :     else if (GetDatabaseEncoding() == PG_UTF8)
     156       48364 :         return UTF8_MatchText(s, slen, p, plen, 0, true);
     157             :     else
     158           0 :         return MB_MatchText(s, slen, p, plen, 0, true);
     159             : }
     160             : 
     161             : static inline int
     162          12 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
     163             : {
     164             :     char       *s,
     165             :                *p;
     166             :     int         slen,
     167             :                 plen;
     168          12 :     pg_locale_t locale = 0;
     169          12 :     bool        locale_is_c = false;
     170             : 
     171          12 :     if (lc_ctype_is_c(collation))
     172           0 :         locale_is_c = true;
     173          12 :     else if (collation != DEFAULT_COLLATION_OID)
     174             :     {
     175           0 :         if (!OidIsValid(collation))
     176             :         {
     177             :             /*
     178             :              * This typically means that the parser could not resolve a
     179             :              * conflict of implicit collations, so report it that way.
     180             :              */
     181           0 :             ereport(ERROR,
     182             :                     (errcode(ERRCODE_INDETERMINATE_COLLATION),
     183             :                      errmsg("could not determine which collation to use for ILIKE"),
     184             :                      errhint("Use the COLLATE clause to set the collation explicitly.")));
     185             :         }
     186           0 :         locale = pg_newlocale_from_collation(collation);
     187             :     }
     188             : 
     189             :     /*
     190             :      * For efficiency reasons, in the single byte case we don't call lower()
     191             :      * on the pattern and text, but instead call SB_lower_char on each
     192             :      * character.  In the multi-byte case we don't have much choice :-(. Also,
     193             :      * ICU does not support single-character case folding, so we go the long
     194             :      * way.
     195             :      */
     196             : 
     197          12 :     if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
     198             :     {
     199             :         /* lower's result is never packed, so OK to use old macros here */
     200          12 :         pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     201             :                                                      PointerGetDatum(pat)));
     202          12 :         p = VARDATA_ANY(pat);
     203          12 :         plen = VARSIZE_ANY_EXHDR(pat);
     204          12 :         str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
     205             :                                                      PointerGetDatum(str)));
     206          12 :         s = VARDATA_ANY(str);
     207          12 :         slen = VARSIZE_ANY_EXHDR(str);
     208          12 :         if (GetDatabaseEncoding() == PG_UTF8)
     209          12 :             return UTF8_MatchText(s, slen, p, plen, 0, true);
     210             :         else
     211           0 :             return MB_MatchText(s, slen, p, plen, 0, true);
     212             :     }
     213             :     else
     214             :     {
     215           0 :         p = VARDATA_ANY(pat);
     216           0 :         plen = VARSIZE_ANY_EXHDR(pat);
     217           0 :         s = VARDATA_ANY(str);
     218           0 :         slen = VARSIZE_ANY_EXHDR(str);
     219           0 :         return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
     220             :     }
     221             : }
     222             : 
     223             : /*
     224             :  *  interface routines called by the function manager
     225             :  */
     226             : 
     227             : Datum
     228       17428 : namelike(PG_FUNCTION_ARGS)
     229             : {
     230       17428 :     Name        str = PG_GETARG_NAME(0);
     231       17428 :     text       *pat = PG_GETARG_TEXT_PP(1);
     232             :     bool        result;
     233             :     char       *s,
     234             :                *p;
     235             :     int         slen,
     236             :                 plen;
     237             : 
     238       17428 :     s = NameStr(*str);
     239       17428 :     slen = strlen(s);
     240       17428 :     p = VARDATA_ANY(pat);
     241       17428 :     plen = VARSIZE_ANY_EXHDR(pat);
     242             : 
     243       17428 :     result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
     244             : 
     245       17428 :     PG_RETURN_BOOL(result);
     246             : }
     247             : 
     248             : Datum
     249         598 : namenlike(PG_FUNCTION_ARGS)
     250             : {
     251         598 :     Name        str = PG_GETARG_NAME(0);
     252         598 :     text       *pat = PG_GETARG_TEXT_PP(1);
     253             :     bool        result;
     254             :     char       *s,
     255             :                *p;
     256             :     int         slen,
     257             :                 plen;
     258             : 
     259         598 :     s = NameStr(*str);
     260         598 :     slen = strlen(s);
     261         598 :     p = VARDATA_ANY(pat);
     262         598 :     plen = VARSIZE_ANY_EXHDR(pat);
     263             : 
     264         598 :     result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
     265             : 
     266         598 :     PG_RETURN_BOOL(result);
     267             : }
     268             : 
     269             : Datum
     270       12688 : textlike(PG_FUNCTION_ARGS)
     271             : {
     272       12688 :     text       *str = PG_GETARG_TEXT_PP(0);
     273       12688 :     text       *pat = PG_GETARG_TEXT_PP(1);
     274             :     bool        result;
     275             :     char       *s,
     276             :                *p;
     277             :     int         slen,
     278             :                 plen;
     279             : 
     280       12688 :     s = VARDATA_ANY(str);
     281       12688 :     slen = VARSIZE_ANY_EXHDR(str);
     282       12688 :     p = VARDATA_ANY(pat);
     283       12688 :     plen = VARSIZE_ANY_EXHDR(pat);
     284             : 
     285       12688 :     result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
     286             : 
     287       12688 :     PG_RETURN_BOOL(result);
     288             : }
     289             : 
     290             : Datum
     291       17650 : textnlike(PG_FUNCTION_ARGS)
     292             : {
     293       17650 :     text       *str = PG_GETARG_TEXT_PP(0);
     294       17650 :     text       *pat = PG_GETARG_TEXT_PP(1);
     295             :     bool        result;
     296             :     char       *s,
     297             :                *p;
     298             :     int         slen,
     299             :                 plen;
     300             : 
     301       17650 :     s = VARDATA_ANY(str);
     302       17650 :     slen = VARSIZE_ANY_EXHDR(str);
     303       17650 :     p = VARDATA_ANY(pat);
     304       17650 :     plen = VARSIZE_ANY_EXHDR(pat);
     305             : 
     306       17650 :     result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
     307             : 
     308       17650 :     PG_RETURN_BOOL(result);
     309             : }
     310             : 
     311             : Datum
     312           0 : bytealike(PG_FUNCTION_ARGS)
     313             : {
     314           0 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     315           0 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     316             :     bool        result;
     317             :     char       *s,
     318             :                *p;
     319             :     int         slen,
     320             :                 plen;
     321             : 
     322           0 :     s = VARDATA_ANY(str);
     323           0 :     slen = VARSIZE_ANY_EXHDR(str);
     324           0 :     p = VARDATA_ANY(pat);
     325           0 :     plen = VARSIZE_ANY_EXHDR(pat);
     326             : 
     327           0 :     result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
     328             : 
     329           0 :     PG_RETURN_BOOL(result);
     330             : }
     331             : 
     332             : Datum
     333           0 : byteanlike(PG_FUNCTION_ARGS)
     334             : {
     335           0 :     bytea      *str = PG_GETARG_BYTEA_PP(0);
     336           0 :     bytea      *pat = PG_GETARG_BYTEA_PP(1);
     337             :     bool        result;
     338             :     char       *s,
     339             :                *p;
     340             :     int         slen,
     341             :                 plen;
     342             : 
     343           0 :     s = VARDATA_ANY(str);
     344           0 :     slen = VARSIZE_ANY_EXHDR(str);
     345           0 :     p = VARDATA_ANY(pat);
     346           0 :     plen = VARSIZE_ANY_EXHDR(pat);
     347             : 
     348           0 :     result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
     349             : 
     350           0 :     PG_RETURN_BOOL(result);
     351             : }
     352             : 
     353             : /*
     354             :  * Case-insensitive versions
     355             :  */
     356             : 
     357             : Datum
     358           0 : nameiclike(PG_FUNCTION_ARGS)
     359             : {
     360           0 :     Name        str = PG_GETARG_NAME(0);
     361           0 :     text       *pat = PG_GETARG_TEXT_PP(1);
     362             :     bool        result;
     363             :     text       *strtext;
     364             : 
     365           0 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     366             :                                                  NameGetDatum(str)));
     367           0 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     368             : 
     369           0 :     PG_RETURN_BOOL(result);
     370             : }
     371             : 
     372             : Datum
     373           0 : nameicnlike(PG_FUNCTION_ARGS)
     374             : {
     375           0 :     Name        str = PG_GETARG_NAME(0);
     376           0 :     text       *pat = PG_GETARG_TEXT_PP(1);
     377             :     bool        result;
     378             :     text       *strtext;
     379             : 
     380           0 :     strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
     381             :                                                  NameGetDatum(str)));
     382           0 :     result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     383             : 
     384           0 :     PG_RETURN_BOOL(result);
     385             : }
     386             : 
     387             : Datum
     388           8 : texticlike(PG_FUNCTION_ARGS)
     389             : {
     390           8 :     text       *str = PG_GETARG_TEXT_PP(0);
     391           8 :     text       *pat = PG_GETARG_TEXT_PP(1);
     392             :     bool        result;
     393             : 
     394           8 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
     395             : 
     396           8 :     PG_RETURN_BOOL(result);
     397             : }
     398             : 
     399             : Datum
     400           4 : texticnlike(PG_FUNCTION_ARGS)
     401             : {
     402           4 :     text       *str = PG_GETARG_TEXT_PP(0);
     403           4 :     text       *pat = PG_GETARG_TEXT_PP(1);
     404             :     bool        result;
     405             : 
     406           4 :     result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
     407             : 
     408           4 :     PG_RETURN_BOOL(result);
     409             : }
     410             : 
     411             : /*
     412             :  * like_escape() --- given a pattern and an ESCAPE string,
     413             :  * convert the pattern to use Postgres' standard backslash escape convention.
     414             :  */
     415             : Datum
     416          30 : like_escape(PG_FUNCTION_ARGS)
     417             : {
     418          30 :     text       *pat = PG_GETARG_TEXT_PP(0);
     419          30 :     text       *esc = PG_GETARG_TEXT_PP(1);
     420             :     text       *result;
     421             : 
     422          30 :     if (pg_database_encoding_max_length() == 1)
     423           0 :         result = SB_do_like_escape(pat, esc);
     424             :     else
     425          30 :         result = MB_do_like_escape(pat, esc);
     426             : 
     427          30 :     PG_RETURN_TEXT_P(result);
     428             : }
     429             : 
     430             : /*
     431             :  * like_escape_bytea() --- given a pattern and an ESCAPE string,
     432             :  * convert the pattern to use Postgres' standard backslash escape convention.
     433             :  */
     434             : Datum
     435           0 : like_escape_bytea(PG_FUNCTION_ARGS)
     436             : {
     437           0 :     bytea      *pat = PG_GETARG_BYTEA_PP(0);
     438           0 :     bytea      *esc = PG_GETARG_BYTEA_PP(1);
     439           0 :     bytea      *result = SB_do_like_escape((text *) pat, (text *) esc);
     440             : 
     441           0 :     PG_RETURN_BYTEA_P((bytea *) result);
     442             : }

Generated by: LCOV version 1.11