LCOV - PostgreSQL - src/backend/utils/adt/encode.c

LCOV - code coverage report

Current view:	top level - src/backend/utils/adt - encode.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL	Lines:	105	211	49.8 %
Date:	2017-09-29 13:40:31	Functions:	11	16	68.8 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * encode.c
       4             :  *    Various data encoding/decoding things.
       5             :  *
       6             :  * Copyright (c) 2001-2017, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/utils/adt/encode.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include <ctype.h>
      17             : 
      18             : #include "utils/builtins.h"
      19             : 
      20             : 
      21             : struct pg_encoding
      22             : {
      23             :     unsigned    (*encode_len) (const char *data, unsigned dlen);
      24             :     unsigned    (*decode_len) (const char *data, unsigned dlen);
      25             :     unsigned    (*encode) (const char *data, unsigned dlen, char *res);
      26             :     unsigned    (*decode) (const char *data, unsigned dlen, char *res);
      27             : };
      28             : 
      29             : static const struct pg_encoding *pg_find_encoding(const char *name);
      30             : 
      31             : /*
      32             :  * SQL functions.
      33             :  */
      34             : 
      35             : Datum
      36           3 : binary_encode(PG_FUNCTION_ARGS)
      37             : {
      38           3 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
      39           3 :     Datum       name = PG_GETARG_DATUM(1);
      40             :     text       *result;
      41             :     char       *namebuf;
      42             :     int         datalen,
      43             :                 resultlen,
      44             :                 res;
      45             :     const struct pg_encoding *enc;
      46             : 
      47           3 :     datalen = VARSIZE_ANY_EXHDR(data);
      48             : 
      49           3 :     namebuf = TextDatumGetCString(name);
      50             : 
      51           3 :     enc = pg_find_encoding(namebuf);
      52           3 :     if (enc == NULL)
      53           0 :         ereport(ERROR,
      54             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
      55             :                  errmsg("unrecognized encoding: \"%s\"", namebuf)));
      56             : 
      57           3 :     resultlen = enc->encode_len(VARDATA_ANY(data), datalen);
      58           3 :     result = palloc(VARHDRSZ + resultlen);
      59             : 
      60           3 :     res = enc->encode(VARDATA_ANY(data), datalen, VARDATA(result));
      61             : 
      62             :     /* Make this FATAL 'cause we've trodden on memory ... */
      63           3 :     if (res > resultlen)
      64           0 :         elog(FATAL, "overflow - encode estimate too small");
      65             : 
      66           3 :     SET_VARSIZE(result, VARHDRSZ + res);
      67             : 
      68           3 :     PG_RETURN_TEXT_P(result);
      69             : }
      70             : 
      71             : Datum
      72           8 : binary_decode(PG_FUNCTION_ARGS)
      73             : {
      74           8 :     text       *data = PG_GETARG_TEXT_PP(0);
      75           8 :     Datum       name = PG_GETARG_DATUM(1);
      76             :     bytea      *result;
      77             :     char       *namebuf;
      78             :     int         datalen,
      79             :                 resultlen,
      80             :                 res;
      81             :     const struct pg_encoding *enc;
      82             : 
      83           8 :     datalen = VARSIZE_ANY_EXHDR(data);
      84             : 
      85           8 :     namebuf = TextDatumGetCString(name);
      86             : 
      87           8 :     enc = pg_find_encoding(namebuf);
      88           8 :     if (enc == NULL)
      89           0 :         ereport(ERROR,
      90             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
      91             :                  errmsg("unrecognized encoding: \"%s\"", namebuf)));
      92             : 
      93           8 :     resultlen = enc->decode_len(VARDATA_ANY(data), datalen);
      94           8 :     result = palloc(VARHDRSZ + resultlen);
      95             : 
      96           8 :     res = enc->decode(VARDATA_ANY(data), datalen, VARDATA(result));
      97             : 
      98             :     /* Make this FATAL 'cause we've trodden on memory ... */
      99           8 :     if (res > resultlen)
     100           0 :         elog(FATAL, "overflow - decode estimate too small");
     101             : 
     102           8 :     SET_VARSIZE(result, VARHDRSZ + res);
     103             : 
     104           8 :     PG_RETURN_BYTEA_P(result);
     105             : }
     106             : 
     107             : 
     108             : /*
     109             :  * HEX
     110             :  */
     111             : 
     112             : static const char hextbl[] = "0123456789abcdef";
     113             : 
     114             : static const int8 hexlookup[128] = {
     115             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     116             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     117             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     118             :     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
     119             :     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     120             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     121             :     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     122             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     123             : };
     124             : 
     125             : unsigned
     126          34 : hex_encode(const char *src, unsigned len, char *dst)
     127             : {
     128          34 :     const char *end = src + len;
     129             : 
     130         344 :     while (src < end)
     131             :     {
     132         276 :         *dst++ = hextbl[(*src >> 4) & 0xF];
     133         276 :         *dst++ = hextbl[*src & 0xF];
     134         276 :         src++;
     135             :     }
     136          34 :     return len * 2;
     137             : }
     138             : 
     139             : static inline char
     140          85 : get_hex(char c)
     141             : {
     142          85 :     int         res = -1;
     143             : 
     144          85 :     if (c > 0 && c < 127)
     145          85 :         res = hexlookup[(unsigned char) c];
     146             : 
     147          85 :     if (res < 0)
     148           1 :         ereport(ERROR,
     149             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     150             :                  errmsg("invalid hexadecimal digit: \"%c\"", c)));
     151             : 
     152          84 :     return (char) res;
     153             : }
     154             : 
     155             : unsigned
     156          13 : hex_decode(const char *src, unsigned len, char *dst)
     157             : {
     158             :     const char *s,
     159             :                *srcend;
     160             :     char        v1,
     161             :                 v2,
     162             :                *p;
     163             : 
     164          13 :     srcend = src + len;
     165          13 :     s = src;
     166          13 :     p = dst;
     167          77 :     while (s < srcend)
     168             :     {
     169          53 :         if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
     170             :         {
     171          10 :             s++;
     172          10 :             continue;
     173             :         }
     174          43 :         v1 = get_hex(*s++) << 4;
     175          43 :         if (s >= srcend)
     176           1 :             ereport(ERROR,
     177             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     178             :                      errmsg("invalid hexadecimal data: odd number of digits")));
     179             : 
     180          42 :         v2 = get_hex(*s++);
     181          41 :         *p++ = v1 | v2;
     182             :     }
     183             : 
     184          11 :     return p - dst;
     185             : }
     186             : 
     187             : static unsigned
     188           0 : hex_enc_len(const char *src, unsigned srclen)
     189             : {
     190           0 :     return srclen << 1;
     191             : }
     192             : 
     193             : static unsigned
     194           4 : hex_dec_len(const char *src, unsigned srclen)
     195             : {
     196           4 :     return srclen >> 1;
     197             : }
     198             : 
     199             : /*
     200             :  * BASE64
     201             :  */
     202             : 
     203             : static const char _base64[] =
     204             : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
     205             : 
     206             : static const int8 b64lookup[128] = {
     207             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     208             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     209             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
     210             :     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
     211             :     -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
     212             :     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
     213             :     -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
     214             :     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
     215             : };
     216             : 
     217             : static unsigned
     218           0 : b64_encode(const char *src, unsigned len, char *dst)
     219             : {
     220             :     char       *p,
     221           0 :                *lend = dst + 76;
     222             :     const char *s,
     223           0 :                *end = src + len;
     224           0 :     int         pos = 2;
     225           0 :     uint32      buf = 0;
     226             : 
     227           0 :     s = src;
     228           0 :     p = dst;
     229             : 
     230           0 :     while (s < end)
     231             :     {
     232           0 :         buf |= (unsigned char) *s << (pos << 3);
     233           0 :         pos--;
     234           0 :         s++;
     235             : 
     236             :         /* write it out */
     237           0 :         if (pos < 0)
     238             :         {
     239           0 :             *p++ = _base64[(buf >> 18) & 0x3f];
     240           0 :             *p++ = _base64[(buf >> 12) & 0x3f];
     241           0 :             *p++ = _base64[(buf >> 6) & 0x3f];
     242           0 :             *p++ = _base64[buf & 0x3f];
     243             : 
     244           0 :             pos = 2;
     245           0 :             buf = 0;
     246             :         }
     247           0 :         if (p >= lend)
     248             :         {
     249           0 :             *p++ = '\n';
     250           0 :             lend = p + 76;
     251             :         }
     252             :     }
     253           0 :     if (pos != 2)
     254             :     {
     255           0 :         *p++ = _base64[(buf >> 18) & 0x3f];
     256           0 :         *p++ = _base64[(buf >> 12) & 0x3f];
     257           0 :         *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
     258           0 :         *p++ = '=';
     259             :     }
     260             : 
     261           0 :     return p - dst;
     262             : }
     263             : 
     264             : static unsigned
     265           0 : b64_decode(const char *src, unsigned len, char *dst)
     266             : {
     267           0 :     const char *srcend = src + len,
     268           0 :                *s = src;
     269           0 :     char       *p = dst;
     270             :     char        c;
     271           0 :     int         b = 0;
     272           0 :     uint32      buf = 0;
     273           0 :     int         pos = 0,
     274           0 :                 end = 0;
     275             : 
     276           0 :     while (s < srcend)
     277             :     {
     278           0 :         c = *s++;
     279             : 
     280           0 :         if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
     281           0 :             continue;
     282             : 
     283           0 :         if (c == '=')
     284             :         {
     285             :             /* end sequence */
     286           0 :             if (!end)
     287             :             {
     288           0 :                 if (pos == 2)
     289           0 :                     end = 1;
     290           0 :                 else if (pos == 3)
     291           0 :                     end = 2;
     292             :                 else
     293           0 :                     ereport(ERROR,
     294             :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     295             :                              errmsg("unexpected \"=\" while decoding base64 sequence")));
     296             :             }
     297           0 :             b = 0;
     298             :         }
     299             :         else
     300             :         {
     301           0 :             b = -1;
     302           0 :             if (c > 0 && c < 127)
     303           0 :                 b = b64lookup[(unsigned char) c];
     304           0 :             if (b < 0)
     305           0 :                 ereport(ERROR,
     306             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     307             :                          errmsg("invalid symbol \"%c\" while decoding base64 sequence", (int) c)));
     308             :         }
     309             :         /* add it to buffer */
     310           0 :         buf = (buf << 6) + b;
     311           0 :         pos++;
     312           0 :         if (pos == 4)
     313             :         {
     314           0 :             *p++ = (buf >> 16) & 255;
     315           0 :             if (end == 0 || end > 1)
     316           0 :                 *p++ = (buf >> 8) & 255;
     317           0 :             if (end == 0 || end > 2)
     318           0 :                 *p++ = buf & 255;
     319           0 :             buf = 0;
     320           0 :             pos = 0;
     321             :         }
     322             :     }
     323             : 
     324           0 :     if (pos != 0)
     325           0 :         ereport(ERROR,
     326             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     327             :                  errmsg("invalid base64 end sequence"),
     328             :                  errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
     329             : 
     330           0 :     return p - dst;
     331             : }
     332             : 
     333             : 
     334             : static unsigned
     335           0 : b64_enc_len(const char *src, unsigned srclen)
     336             : {
     337             :     /* 3 bytes will be converted to 4, linefeed after 76 chars */
     338           0 :     return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
     339             : }
     340             : 
     341             : static unsigned
     342           0 : b64_dec_len(const char *src, unsigned srclen)
     343             : {
     344           0 :     return (srclen * 3) >> 2;
     345             : }
     346             : 
     347             : /*
     348             :  * Escape
     349             :  * Minimally escape bytea to text.
     350             :  * De-escape text to bytea.
     351             :  *
     352             :  * We must escape zero bytes and high-bit-set bytes to avoid generating
     353             :  * text that might be invalid in the current encoding, or that might
     354             :  * change to something else if passed through an encoding conversion
     355             :  * (leading to failing to de-escape to the original bytea value).
     356             :  * Also of course backslash itself has to be escaped.
     357             :  *
     358             :  * De-escaping processes \\ and any \### octal
     359             :  */
     360             : 
     361             : #define VAL(CH)         ((CH) - '0')
     362             : #define DIG(VAL)        ((VAL) + '0')
     363             : 
     364             : static unsigned
     365           3 : esc_encode(const char *src, unsigned srclen, char *dst)
     366             : {
     367           3 :     const char *end = src + srclen;
     368           3 :     char       *rp = dst;
     369           3 :     int         len = 0;
     370             : 
     371          29 :     while (src < end)
     372             :     {
     373          23 :         unsigned char c = (unsigned char) *src;
     374             : 
     375          23 :         if (c == '\0' || IS_HIGHBIT_SET(c))
     376             :         {
     377           2 :             rp[0] = '\\';
     378           2 :             rp[1] = DIG(c >> 6);
     379           2 :             rp[2] = DIG((c >> 3) & 7);
     380           2 :             rp[3] = DIG(c & 7);
     381           2 :             rp += 4;
     382           2 :             len += 4;
     383             :         }
     384          21 :         else if (c == '\\')
     385             :         {
     386           0 :             rp[0] = '\\';
     387           0 :             rp[1] = '\\';
     388           0 :             rp += 2;
     389           0 :             len += 2;
     390             :         }
     391             :         else
     392             :         {
     393          21 :             *rp++ = c;
     394          21 :             len++;
     395             :         }
     396             : 
     397          23 :         src++;
     398             :     }
     399             : 
     400           3 :     return len;
     401             : }
     402             : 
     403             : static unsigned
     404           4 : esc_decode(const char *src, unsigned srclen, char *dst)
     405             : {
     406           4 :     const char *end = src + srclen;
     407           4 :     char       *rp = dst;
     408           4 :     int         len = 0;
     409             : 
     410      400008 :     while (src < end)
     411             :     {
     412      400000 :         if (src[0] != '\\')
     413      400000 :             *rp++ = *src++;
     414           0 :         else if (src + 3 < end &&
     415           0 :                  (src[1] >= '0' && src[1] <= '3') &&
     416           0 :                  (src[2] >= '0' && src[2] <= '7') &&
     417           0 :                  (src[3] >= '0' && src[3] <= '7'))
     418           0 :         {
     419             :             int         val;
     420             : 
     421           0 :             val = VAL(src[1]);
     422           0 :             val <<= 3;
     423           0 :             val += VAL(src[2]);
     424           0 :             val <<= 3;
     425           0 :             *rp++ = val + VAL(src[3]);
     426           0 :             src += 4;
     427             :         }
     428           0 :         else if (src + 1 < end &&
     429           0 :                  (src[1] == '\\'))
     430             :         {
     431           0 :             *rp++ = '\\';
     432           0 :             src += 2;
     433             :         }
     434             :         else
     435             :         {
     436             :             /*
     437             :              * One backslash, not followed by ### valid octal. Should never
     438             :              * get here, since esc_dec_len does same check.
     439             :              */
     440           0 :             ereport(ERROR,
     441             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     442             :                      errmsg("invalid input syntax for type %s", "bytea")));
     443             :         }
     444             : 
     445      400000 :         len++;
     446             :     }
     447             : 
     448           4 :     return len;
     449             : }
     450             : 
     451             : static unsigned
     452           3 : esc_enc_len(const char *src, unsigned srclen)
     453             : {
     454           3 :     const char *end = src + srclen;
     455           3 :     int         len = 0;
     456             : 
     457          29 :     while (src < end)
     458             :     {
     459          23 :         if (*src == '\0' || IS_HIGHBIT_SET(*src))
     460           2 :             len += 4;
     461          21 :         else if (*src == '\\')
     462           0 :             len += 2;
     463             :         else
     464          21 :             len++;
     465             : 
     466          23 :         src++;
     467             :     }
     468             : 
     469           3 :     return len;
     470             : }
     471             : 
     472             : static unsigned
     473           4 : esc_dec_len(const char *src, unsigned srclen)
     474             : {
     475           4 :     const char *end = src + srclen;
     476           4 :     int         len = 0;
     477             : 
     478      400008 :     while (src < end)
     479             :     {
     480      400000 :         if (src[0] != '\\')
     481      400000 :             src++;
     482           0 :         else if (src + 3 < end &&
     483           0 :                  (src[1] >= '0' && src[1] <= '3') &&
     484           0 :                  (src[2] >= '0' && src[2] <= '7') &&
     485           0 :                  (src[3] >= '0' && src[3] <= '7'))
     486             :         {
     487             :             /*
     488             :              * backslash + valid octal
     489             :              */
     490           0 :             src += 4;
     491             :         }
     492           0 :         else if (src + 1 < end &&
     493           0 :                  (src[1] == '\\'))
     494             :         {
     495             :             /*
     496             :              * two backslashes = backslash
     497             :              */
     498           0 :             src += 2;
     499             :         }
     500             :         else
     501             :         {
     502             :             /*
     503             :              * one backslash, not followed by ### valid octal
     504             :              */
     505           0 :             ereport(ERROR,
     506             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     507             :                      errmsg("invalid input syntax for type %s", "bytea")));
     508             :         }
     509             : 
     510      400000 :         len++;
     511             :     }
     512           4 :     return len;
     513             : }
     514             : 
     515             : /*
     516             :  * Common
     517             :  */
     518             : 
     519             : static const struct
     520             : {
     521             :     const char *name;
     522             :     struct pg_encoding enc;
     523             : }           enclist[] =
     524             : 
     525             : {
     526             :     {
     527             :         "hex",
     528             :         {
     529             :             hex_enc_len, hex_dec_len, hex_encode, hex_decode
     530             :         }
     531             :     },
     532             :     {
     533             :         "base64",
     534             :         {
     535             :             b64_enc_len, b64_dec_len, b64_encode, b64_decode
     536             :         }
     537             :     },
     538             :     {
     539             :         "escape",
     540             :         {
     541             :             esc_enc_len, esc_dec_len, esc_encode, esc_decode
     542             :         }
     543             :     },
     544             :     {
     545             :         NULL,
     546             :         {
     547             :             NULL, NULL, NULL, NULL
     548             :         }
     549             :     }
     550             : };
     551             : 
     552             : static const struct pg_encoding *
     553          11 : pg_find_encoding(const char *name)
     554             : {
     555             :     int         i;
     556             : 
     557          25 :     for (i = 0; enclist[i].name; i++)
     558          25 :         if (pg_strcasecmp(enclist[i].name, name) == 0)
     559          11 :             return &enclist[i].enc;
     560             : 
     561           0 :     return NULL;
     562             : }

Generated by: LCOV version 1.11