Line data Source code
1 : /*-----------------------------------------------------------------------
2 : * ascii.c
3 : * The PostgreSQL routine for string to ascii conversion.
4 : *
5 : * Portions Copyright (c) 1999-2017, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/utils/adt/ascii.c
9 : *
10 : *-----------------------------------------------------------------------
11 : */
12 : #include "postgres.h"
13 :
14 : #include "mb/pg_wchar.h"
15 : #include "utils/ascii.h"
16 : #include "utils/builtins.h"
17 :
18 : static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
19 : unsigned char *dest, int enc);
20 : static text *encode_to_ascii(text *data, int enc);
21 :
22 :
23 : /* ----------
24 : * to_ascii
25 : * ----------
26 : */
27 : static void
28 0 : pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
29 : {
30 : unsigned char *x;
31 : const unsigned char *ascii;
32 : int range;
33 :
34 : /*
35 : * relevant start for an encoding
36 : */
37 : #define RANGE_128 128
38 : #define RANGE_160 160
39 :
40 0 : if (enc == PG_LATIN1)
41 : {
42 : /*
43 : * ISO-8859-1 <range: 160 -- 255>
44 : */
45 0 : ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
46 0 : range = RANGE_160;
47 : }
48 0 : else if (enc == PG_LATIN2)
49 : {
50 : /*
51 : * ISO-8859-2 <range: 160 -- 255>
52 : */
53 0 : ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
54 0 : range = RANGE_160;
55 : }
56 0 : else if (enc == PG_LATIN9)
57 : {
58 : /*
59 : * ISO-8859-15 <range: 160 -- 255>
60 : */
61 0 : ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
62 0 : range = RANGE_160;
63 : }
64 0 : else if (enc == PG_WIN1250)
65 : {
66 : /*
67 : * Window CP1250 <range: 128 -- 255>
68 : */
69 0 : ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
70 0 : range = RANGE_128;
71 : }
72 : else
73 : {
74 0 : ereport(ERROR,
75 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
76 : errmsg("encoding conversion from %s to ASCII not supported",
77 : pg_encoding_to_char(enc))));
78 0 : return; /* keep compiler quiet */
79 : }
80 :
81 : /*
82 : * Encode
83 : */
84 0 : for (x = src; x < src_end; x++)
85 : {
86 0 : if (*x < 128)
87 0 : *dest++ = *x;
88 0 : else if (*x < range)
89 0 : *dest++ = ' '; /* bogus 128 to 'range' */
90 : else
91 0 : *dest++ = ascii[*x - range];
92 : }
93 : }
94 :
95 : /* ----------
96 : * encode text
97 : *
98 : * The text datum is overwritten in-place, therefore this coding method
99 : * cannot support conversions that change the string length!
100 : * ----------
101 : */
102 : static text *
103 0 : encode_to_ascii(text *data, int enc)
104 : {
105 0 : pg_to_ascii((unsigned char *) VARDATA(data), /* src */
106 0 : (unsigned char *) (data) + VARSIZE(data), /* src end */
107 0 : (unsigned char *) VARDATA(data), /* dest */
108 : enc); /* encoding */
109 :
110 0 : return data;
111 : }
112 :
113 : /* ----------
114 : * convert to ASCII - enc is set as 'name' arg.
115 : * ----------
116 : */
117 : Datum
118 0 : to_ascii_encname(PG_FUNCTION_ARGS)
119 : {
120 0 : text *data = PG_GETARG_TEXT_P_COPY(0);
121 0 : char *encname = NameStr(*PG_GETARG_NAME(1));
122 0 : int enc = pg_char_to_encoding(encname);
123 :
124 0 : if (enc < 0)
125 0 : ereport(ERROR,
126 : (errcode(ERRCODE_UNDEFINED_OBJECT),
127 : errmsg("%s is not a valid encoding name", encname)));
128 :
129 0 : PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
130 : }
131 :
132 : /* ----------
133 : * convert to ASCII - enc is set as int4
134 : * ----------
135 : */
136 : Datum
137 0 : to_ascii_enc(PG_FUNCTION_ARGS)
138 : {
139 0 : text *data = PG_GETARG_TEXT_P_COPY(0);
140 0 : int enc = PG_GETARG_INT32(1);
141 :
142 0 : if (!PG_VALID_ENCODING(enc))
143 0 : ereport(ERROR,
144 : (errcode(ERRCODE_UNDEFINED_OBJECT),
145 : errmsg("%d is not a valid encoding code", enc)));
146 :
147 0 : PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
148 : }
149 :
150 : /* ----------
151 : * convert to ASCII - current enc is DatabaseEncoding
152 : * ----------
153 : */
154 : Datum
155 0 : to_ascii_default(PG_FUNCTION_ARGS)
156 : {
157 0 : text *data = PG_GETARG_TEXT_P_COPY(0);
158 0 : int enc = GetDatabaseEncoding();
159 :
160 0 : PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
161 : }
162 :
163 : /* ----------
164 : * Copy a string in an arbitrary backend-safe encoding, converting it to a
165 : * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
166 : * behavior is identical to strlcpy(), except that we don't bother with a
167 : * return value.
168 : *
169 : * This must not trigger ereport(ERROR), as it is called in postmaster.
170 : * ----------
171 : */
172 : void
173 345 : ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
174 : {
175 345 : if (destsiz == 0) /* corner case: no room for trailing nul */
176 345 : return;
177 :
178 7015 : while (--destsiz > 0)
179 : {
180 : /* use unsigned char here to avoid compiler warning */
181 6670 : unsigned char ch = *src++;
182 :
183 6670 : if (ch == '\0')
184 345 : break;
185 : /* Keep printable ASCII characters */
186 6325 : if (32 <= ch && ch <= 127)
187 6325 : *dest = ch;
188 : /* White-space is also OK */
189 0 : else if (ch == '\n' || ch == '\r' || ch == '\t')
190 0 : *dest = ch;
191 : /* Everything else is replaced with '?' */
192 : else
193 0 : *dest = '?';
194 6325 : dest++;
195 : }
196 :
197 345 : *dest = '\0';
198 : }
|