Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * Utility functions for conversion procs.
4 : *
5 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
6 : * Portions Copyright (c) 1994, Regents of the University of California
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/mb/conv.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 : #include "mb/pg_wchar.h"
15 :
16 :
17 : /*
18 : * local2local: a generic single byte charset encoding
19 : * conversion between two ASCII-superset encodings.
20 : *
21 : * l points to the source string of length len
22 : * p is the output area (must be large enough!)
23 : * src_encoding is the PG identifier for the source encoding
24 : * dest_encoding is the PG identifier for the target encoding
25 : * tab holds conversion entries for the source charset
26 : * starting from 128 (0x80). each entry in the table holds the corresponding
27 : * code point for the target charset, or 0 if there is no equivalent code.
28 : */
29 : void
30 28 : local2local(const unsigned char *l,
31 : unsigned char *p,
32 : int len,
33 : int src_encoding,
34 : int dest_encoding,
35 : const unsigned char *tab)
36 : {
37 : unsigned char c1,
38 : c2;
39 :
40 98 : while (len > 0)
41 : {
42 42 : c1 = *l;
43 42 : if (c1 == 0)
44 0 : report_invalid_encoding(src_encoding, (const char *) l, len);
45 42 : if (!IS_HIGHBIT_SET(c1))
46 42 : *p++ = c1;
47 : else
48 : {
49 0 : c2 = tab[c1 - HIGHBIT];
50 0 : if (c2)
51 0 : *p++ = c2;
52 : else
53 0 : report_untranslatable_char(src_encoding, dest_encoding,
54 : (const char *) l, len);
55 : }
56 42 : l++;
57 42 : len--;
58 : }
59 28 : *p = '\0';
60 28 : }
61 :
62 : /*
63 : * LATINn ---> MIC when the charset's local codes map directly to MIC
64 : *
65 : * l points to the source string of length len
66 : * p is the output area (must be large enough!)
67 : * lc is the mule character set id for the local encoding
68 : * encoding is the PG identifier for the local encoding
69 : */
70 : void
71 10 : latin2mic(const unsigned char *l, unsigned char *p, int len,
72 : int lc, int encoding)
73 : {
74 : int c1;
75 :
76 35 : while (len > 0)
77 : {
78 15 : c1 = *l;
79 15 : if (c1 == 0)
80 0 : report_invalid_encoding(encoding, (const char *) l, len);
81 15 : if (IS_HIGHBIT_SET(c1))
82 0 : *p++ = lc;
83 15 : *p++ = c1;
84 15 : l++;
85 15 : len--;
86 : }
87 10 : *p = '\0';
88 10 : }
89 :
90 : /*
91 : * MIC ---> LATINn when the charset's local codes map directly to MIC
92 : *
93 : * mic points to the source string of length len
94 : * p is the output area (must be large enough!)
95 : * lc is the mule character set id for the local encoding
96 : * encoding is the PG identifier for the local encoding
97 : */
98 : void
99 10 : mic2latin(const unsigned char *mic, unsigned char *p, int len,
100 : int lc, int encoding)
101 : {
102 : int c1;
103 :
104 35 : while (len > 0)
105 : {
106 15 : c1 = *mic;
107 15 : if (c1 == 0)
108 0 : report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
109 15 : if (!IS_HIGHBIT_SET(c1))
110 : {
111 : /* easy for ASCII */
112 15 : *p++ = c1;
113 15 : mic++;
114 15 : len--;
115 : }
116 : else
117 : {
118 0 : int l = pg_mic_mblen(mic);
119 :
120 0 : if (len < l)
121 0 : report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
122 : len);
123 0 : if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
124 0 : report_untranslatable_char(PG_MULE_INTERNAL, encoding,
125 : (const char *) mic, len);
126 0 : *p++ = mic[1];
127 0 : mic += 2;
128 0 : len -= 2;
129 : }
130 : }
131 10 : *p = '\0';
132 10 : }
133 :
134 :
135 : /*
136 : * ASCII ---> MIC
137 : *
138 : * While ordinarily SQL_ASCII encoding is forgiving of high-bit-set
139 : * characters, here we must take a hard line because we don't know
140 : * the appropriate MIC equivalent.
141 : */
142 : void
143 3 : pg_ascii2mic(const unsigned char *l, unsigned char *p, int len)
144 : {
145 : int c1;
146 :
147 6 : while (len > 0)
148 : {
149 0 : c1 = *l;
150 0 : if (c1 == 0 || IS_HIGHBIT_SET(c1))
151 0 : report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
152 0 : *p++ = c1;
153 0 : l++;
154 0 : len--;
155 : }
156 3 : *p = '\0';
157 3 : }
158 :
159 : /*
160 : * MIC ---> ASCII
161 : */
162 : void
163 2 : pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len)
164 : {
165 : int c1;
166 :
167 4 : while (len > 0)
168 : {
169 0 : c1 = *mic;
170 0 : if (c1 == 0 || IS_HIGHBIT_SET(c1))
171 0 : report_untranslatable_char(PG_MULE_INTERNAL, PG_SQL_ASCII,
172 : (const char *) mic, len);
173 0 : *p++ = c1;
174 0 : mic++;
175 0 : len--;
176 : }
177 2 : *p = '\0';
178 2 : }
179 :
180 : /*
181 : * latin2mic_with_table: a generic single byte charset encoding
182 : * conversion from a local charset to the mule internal code.
183 : *
184 : * l points to the source string of length len
185 : * p is the output area (must be large enough!)
186 : * lc is the mule character set id for the local encoding
187 : * encoding is the PG identifier for the local encoding
188 : * tab holds conversion entries for the local charset
189 : * starting from 128 (0x80). each entry in the table holds the corresponding
190 : * code point for the mule encoding, or 0 if there is no equivalent code.
191 : */
192 : void
193 8 : latin2mic_with_table(const unsigned char *l,
194 : unsigned char *p,
195 : int len,
196 : int lc,
197 : int encoding,
198 : const unsigned char *tab)
199 : {
200 : unsigned char c1,
201 : c2;
202 :
203 28 : while (len > 0)
204 : {
205 12 : c1 = *l;
206 12 : if (c1 == 0)
207 0 : report_invalid_encoding(encoding, (const char *) l, len);
208 12 : if (!IS_HIGHBIT_SET(c1))
209 12 : *p++ = c1;
210 : else
211 : {
212 0 : c2 = tab[c1 - HIGHBIT];
213 0 : if (c2)
214 : {
215 0 : *p++ = lc;
216 0 : *p++ = c2;
217 : }
218 : else
219 0 : report_untranslatable_char(encoding, PG_MULE_INTERNAL,
220 : (const char *) l, len);
221 : }
222 12 : l++;
223 12 : len--;
224 : }
225 8 : *p = '\0';
226 8 : }
227 :
228 : /*
229 : * mic2latin_with_table: a generic single byte charset encoding
230 : * conversion from the mule internal code to a local charset.
231 : *
232 : * mic points to the source string of length len
233 : * p is the output area (must be large enough!)
234 : * lc is the mule character set id for the local encoding
235 : * encoding is the PG identifier for the local encoding
236 : * tab holds conversion entries for the mule internal code's second byte,
237 : * starting from 128 (0x80). each entry in the table holds the corresponding
238 : * code point for the local charset, or 0 if there is no equivalent code.
239 : */
240 : void
241 8 : mic2latin_with_table(const unsigned char *mic,
242 : unsigned char *p,
243 : int len,
244 : int lc,
245 : int encoding,
246 : const unsigned char *tab)
247 : {
248 : unsigned char c1,
249 : c2;
250 :
251 28 : while (len > 0)
252 : {
253 12 : c1 = *mic;
254 12 : if (c1 == 0)
255 0 : report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
256 12 : if (!IS_HIGHBIT_SET(c1))
257 : {
258 : /* easy for ASCII */
259 12 : *p++ = c1;
260 12 : mic++;
261 12 : len--;
262 : }
263 : else
264 : {
265 0 : int l = pg_mic_mblen(mic);
266 :
267 0 : if (len < l)
268 0 : report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
269 : len);
270 0 : if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
271 0 : (c2 = tab[mic[1] - HIGHBIT]) == 0)
272 : {
273 0 : report_untranslatable_char(PG_MULE_INTERNAL, encoding,
274 : (const char *) mic, len);
275 : break; /* keep compiler quiet */
276 : }
277 0 : *p++ = c2;
278 0 : mic += 2;
279 0 : len -= 2;
280 : }
281 : }
282 8 : *p = '\0';
283 8 : }
284 :
285 : /*
286 : * comparison routine for bsearch()
287 : * this routine is intended for combined UTF8 -> local code
288 : */
289 : static int
290 0 : compare3(const void *p1, const void *p2)
291 : {
292 : uint32 s1,
293 : s2,
294 : d1,
295 : d2;
296 :
297 0 : s1 = *(const uint32 *) p1;
298 0 : s2 = *((const uint32 *) p1 + 1);
299 0 : d1 = ((const pg_utf_to_local_combined *) p2)->utf1;
300 0 : d2 = ((const pg_utf_to_local_combined *) p2)->utf2;
301 0 : return (s1 > d1 || (s1 == d1 && s2 > d2)) ? 1 : ((s1 == d1 && s2 == d2) ? 0 : -1);
302 : }
303 :
304 : /*
305 : * comparison routine for bsearch()
306 : * this routine is intended for local code -> combined UTF8
307 : */
308 : static int
309 0 : compare4(const void *p1, const void *p2)
310 : {
311 : uint32 v1,
312 : v2;
313 :
314 0 : v1 = *(const uint32 *) p1;
315 0 : v2 = ((const pg_local_to_utf_combined *) p2)->code;
316 0 : return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);
317 : }
318 :
319 : /*
320 : * store 32bit character representation into multibyte stream
321 : */
322 : static inline unsigned char *
323 0 : store_coded_char(unsigned char *dest, uint32 code)
324 : {
325 0 : if (code & 0xff000000)
326 0 : *dest++ = code >> 24;
327 0 : if (code & 0x00ff0000)
328 0 : *dest++ = code >> 16;
329 0 : if (code & 0x0000ff00)
330 0 : *dest++ = code >> 8;
331 0 : if (code & 0x000000ff)
332 0 : *dest++ = code;
333 0 : return dest;
334 : }
335 :
336 : /*
337 : * Convert a character using a conversion radix tree.
338 : *
339 : * 'l' is the length of the input character in bytes, and b1-b4 are
340 : * the input character's bytes.
341 : */
342 : static inline uint32
343 0 : pg_mb_radix_conv(const pg_mb_radix_tree *rt,
344 : int l,
345 : unsigned char b1,
346 : unsigned char b2,
347 : unsigned char b3,
348 : unsigned char b4)
349 : {
350 0 : if (l == 4)
351 : {
352 : /* 4-byte code */
353 :
354 : /* check code validity */
355 0 : if (b1 < rt->b4_1_lower || b1 > rt->b4_1_upper ||
356 0 : b2 < rt->b4_2_lower || b2 > rt->b4_2_upper ||
357 0 : b3 < rt->b4_3_lower || b3 > rt->b4_3_upper ||
358 0 : b4 < rt->b4_4_lower || b4 > rt->b4_4_upper)
359 0 : return 0;
360 :
361 : /* perform lookup */
362 0 : if (rt->chars32)
363 : {
364 0 : uint32 idx = rt->b4root;
365 :
366 0 : idx = rt->chars32[b1 + idx - rt->b4_1_lower];
367 0 : idx = rt->chars32[b2 + idx - rt->b4_2_lower];
368 0 : idx = rt->chars32[b3 + idx - rt->b4_3_lower];
369 0 : return rt->chars32[b4 + idx - rt->b4_4_lower];
370 : }
371 : else
372 : {
373 0 : uint16 idx = rt->b4root;
374 :
375 0 : idx = rt->chars16[b1 + idx - rt->b4_1_lower];
376 0 : idx = rt->chars16[b2 + idx - rt->b4_2_lower];
377 0 : idx = rt->chars16[b3 + idx - rt->b4_3_lower];
378 0 : return rt->chars16[b4 + idx - rt->b4_4_lower];
379 : }
380 : }
381 0 : else if (l == 3)
382 : {
383 : /* 3-byte code */
384 :
385 : /* check code validity */
386 0 : if (b2 < rt->b3_1_lower || b2 > rt->b3_1_upper ||
387 0 : b3 < rt->b3_2_lower || b3 > rt->b3_2_upper ||
388 0 : b4 < rt->b3_3_lower || b4 > rt->b3_3_upper)
389 0 : return 0;
390 :
391 : /* perform lookup */
392 0 : if (rt->chars32)
393 : {
394 0 : uint32 idx = rt->b3root;
395 :
396 0 : idx = rt->chars32[b2 + idx - rt->b3_1_lower];
397 0 : idx = rt->chars32[b3 + idx - rt->b3_2_lower];
398 0 : return rt->chars32[b4 + idx - rt->b3_3_lower];
399 : }
400 : else
401 : {
402 0 : uint16 idx = rt->b3root;
403 :
404 0 : idx = rt->chars16[b2 + idx - rt->b3_1_lower];
405 0 : idx = rt->chars16[b3 + idx - rt->b3_2_lower];
406 0 : return rt->chars16[b4 + idx - rt->b3_3_lower];
407 : }
408 : }
409 0 : else if (l == 2)
410 : {
411 : /* 2-byte code */
412 :
413 : /* check code validity - first byte */
414 0 : if (b3 < rt->b2_1_lower || b3 > rt->b2_1_upper ||
415 0 : b4 < rt->b2_2_lower || b4 > rt->b2_2_upper)
416 0 : return 0;
417 :
418 : /* perform lookup */
419 0 : if (rt->chars32)
420 : {
421 0 : uint32 idx = rt->b2root;
422 :
423 0 : idx = rt->chars32[b3 + idx - rt->b2_1_lower];
424 0 : return rt->chars32[b4 + idx - rt->b2_2_lower];
425 : }
426 : else
427 : {
428 0 : uint16 idx = rt->b2root;
429 :
430 0 : idx = rt->chars16[b3 + idx - rt->b2_1_lower];
431 0 : return rt->chars16[b4 + idx - rt->b2_2_lower];
432 : }
433 : }
434 0 : else if (l == 1)
435 : {
436 : /* 1-byte code */
437 :
438 : /* check code validity - first byte */
439 0 : if (b4 < rt->b1_lower || b4 > rt->b1_upper)
440 0 : return 0;
441 :
442 : /* perform lookup */
443 0 : if (rt->chars32)
444 0 : return rt->chars32[b4 + rt->b1root - rt->b1_lower];
445 : else
446 0 : return rt->chars16[b4 + rt->b1root - rt->b1_lower];
447 : }
448 0 : return 0; /* shouldn't happen */
449 : }
450 :
451 : /*
452 : * UTF8 ---> local code
453 : *
454 : * utf: input string in UTF8 encoding (need not be null-terminated)
455 : * len: length of input string (in bytes)
456 : * iso: pointer to the output area (must be large enough!)
457 : (output string will be null-terminated)
458 : * map: conversion map for single characters
459 : * cmap: conversion map for combined characters
460 : * (optional, pass NULL if none)
461 : * cmapsize: number of entries in the conversion map for combined characters
462 : * (optional, pass 0 if none)
463 : * conv_func: algorithmic encoding conversion function
464 : * (optional, pass NULL if none)
465 : * encoding: PG identifier for the local encoding
466 : *
467 : * For each character, the cmap (if provided) is consulted first; if no match,
468 : * the map is consulted next; if still no match, the conv_func (if provided)
469 : * is applied. An error is raised if no match is found.
470 : *
471 : * See pg_wchar.h for more details about the data structures used here.
472 : */
473 : void
474 76 : UtfToLocal(const unsigned char *utf, int len,
475 : unsigned char *iso,
476 : const pg_mb_radix_tree *map,
477 : const pg_utf_to_local_combined *cmap, int cmapsize,
478 : utf_local_conversion_func conv_func,
479 : int encoding)
480 : {
481 : uint32 iutf;
482 : int l;
483 : const pg_utf_to_local_combined *cp;
484 :
485 76 : if (!PG_VALID_ENCODING(encoding))
486 0 : ereport(ERROR,
487 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
488 : errmsg("invalid encoding number: %d", encoding)));
489 :
490 190 : for (; len > 0; len -= l)
491 : {
492 114 : unsigned char b1 = 0;
493 114 : unsigned char b2 = 0;
494 114 : unsigned char b3 = 0;
495 114 : unsigned char b4 = 0;
496 :
497 : /* "break" cases all represent errors */
498 114 : if (*utf == '\0')
499 0 : break;
500 :
501 114 : l = pg_utf_mblen(utf);
502 114 : if (len < l)
503 0 : break;
504 :
505 114 : if (!pg_utf8_islegal(utf, l))
506 0 : break;
507 :
508 114 : if (l == 1)
509 : {
510 : /* ASCII case is easy, assume it's one-to-one conversion */
511 114 : *iso++ = *utf++;
512 114 : continue;
513 : }
514 :
515 : /* collect coded char of length l */
516 0 : if (l == 2)
517 : {
518 0 : b3 = *utf++;
519 0 : b4 = *utf++;
520 : }
521 0 : else if (l == 3)
522 : {
523 0 : b2 = *utf++;
524 0 : b3 = *utf++;
525 0 : b4 = *utf++;
526 : }
527 0 : else if (l == 4)
528 : {
529 0 : b1 = *utf++;
530 0 : b2 = *utf++;
531 0 : b3 = *utf++;
532 0 : b4 = *utf++;
533 : }
534 : else
535 : {
536 0 : elog(ERROR, "unsupported character length %d", l);
537 : iutf = 0; /* keep compiler quiet */
538 : }
539 0 : iutf = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
540 :
541 : /* First, try with combined map if possible */
542 0 : if (cmap && len > l)
543 : {
544 0 : const unsigned char *utf_save = utf;
545 0 : int len_save = len;
546 0 : int l_save = l;
547 :
548 : /* collect next character, same as above */
549 0 : len -= l;
550 :
551 0 : l = pg_utf_mblen(utf);
552 0 : if (len < l)
553 0 : break;
554 :
555 0 : if (!pg_utf8_islegal(utf, l))
556 0 : break;
557 :
558 : /* We assume ASCII character cannot be in combined map */
559 0 : if (l > 1)
560 : {
561 : uint32 iutf2;
562 : uint32 cutf[2];
563 :
564 0 : if (l == 2)
565 : {
566 0 : iutf2 = *utf++ << 8;
567 0 : iutf2 |= *utf++;
568 : }
569 0 : else if (l == 3)
570 : {
571 0 : iutf2 = *utf++ << 16;
572 0 : iutf2 |= *utf++ << 8;
573 0 : iutf2 |= *utf++;
574 : }
575 0 : else if (l == 4)
576 : {
577 0 : iutf2 = *utf++ << 24;
578 0 : iutf2 |= *utf++ << 16;
579 0 : iutf2 |= *utf++ << 8;
580 0 : iutf2 |= *utf++;
581 : }
582 : else
583 : {
584 0 : elog(ERROR, "unsupported character length %d", l);
585 : iutf2 = 0; /* keep compiler quiet */
586 : }
587 :
588 0 : cutf[0] = iutf;
589 0 : cutf[1] = iutf2;
590 :
591 0 : cp = bsearch(cutf, cmap, cmapsize,
592 : sizeof(pg_utf_to_local_combined), compare3);
593 :
594 0 : if (cp)
595 : {
596 0 : iso = store_coded_char(iso, cp->code);
597 0 : continue;
598 : }
599 : }
600 :
601 : /* fail, so back up to reprocess second character next time */
602 0 : utf = utf_save;
603 0 : len = len_save;
604 0 : l = l_save;
605 : }
606 :
607 : /* Now check ordinary map */
608 0 : if (map)
609 : {
610 0 : uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
611 :
612 0 : if (converted)
613 : {
614 0 : iso = store_coded_char(iso, converted);
615 0 : continue;
616 : }
617 : }
618 :
619 : /* if there's a conversion function, try that */
620 0 : if (conv_func)
621 : {
622 0 : uint32 converted = (*conv_func) (iutf);
623 :
624 0 : if (converted)
625 : {
626 0 : iso = store_coded_char(iso, converted);
627 0 : continue;
628 : }
629 : }
630 :
631 : /* failed to translate this character */
632 0 : report_untranslatable_char(PG_UTF8, encoding,
633 0 : (const char *) (utf - l), len);
634 : }
635 :
636 : /* if we broke out of loop early, must be invalid input */
637 76 : if (len > 0)
638 0 : report_invalid_encoding(PG_UTF8, (const char *) utf, len);
639 :
640 76 : *iso = '\0';
641 76 : }
642 :
643 : /*
644 : * local code ---> UTF8
645 : *
646 : * iso: input string in local encoding (need not be null-terminated)
647 : * len: length of input string (in bytes)
648 : * utf: pointer to the output area (must be large enough!)
649 : (output string will be null-terminated)
650 : * map: conversion map for single characters
651 : * cmap: conversion map for combined characters
652 : * (optional, pass NULL if none)
653 : * cmapsize: number of entries in the conversion map for combined characters
654 : * (optional, pass 0 if none)
655 : * conv_func: algorithmic encoding conversion function
656 : * (optional, pass NULL if none)
657 : * encoding: PG identifier for the local encoding
658 : *
659 : * For each character, the map is consulted first; if no match, the cmap
660 : * (if provided) is consulted next; if still no match, the conv_func
661 : * (if provided) is applied. An error is raised if no match is found.
662 : *
663 : * See pg_wchar.h for more details about the data structures used here.
664 : */
665 : void
666 76 : LocalToUtf(const unsigned char *iso, int len,
667 : unsigned char *utf,
668 : const pg_mb_radix_tree *map,
669 : const pg_local_to_utf_combined *cmap, int cmapsize,
670 : utf_local_conversion_func conv_func,
671 : int encoding)
672 : {
673 : uint32 iiso;
674 : int l;
675 : const pg_local_to_utf_combined *cp;
676 :
677 76 : if (!PG_VALID_ENCODING(encoding))
678 0 : ereport(ERROR,
679 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
680 : errmsg("invalid encoding number: %d", encoding)));
681 :
682 190 : for (; len > 0; len -= l)
683 : {
684 114 : unsigned char b1 = 0;
685 114 : unsigned char b2 = 0;
686 114 : unsigned char b3 = 0;
687 114 : unsigned char b4 = 0;
688 :
689 : /* "break" cases all represent errors */
690 114 : if (*iso == '\0')
691 0 : break;
692 :
693 114 : if (!IS_HIGHBIT_SET(*iso))
694 : {
695 : /* ASCII case is easy, assume it's one-to-one conversion */
696 114 : *utf++ = *iso++;
697 114 : l = 1;
698 114 : continue;
699 : }
700 :
701 0 : l = pg_encoding_verifymb(encoding, (const char *) iso, len);
702 0 : if (l < 0)
703 0 : break;
704 :
705 : /* collect coded char of length l */
706 0 : if (l == 1)
707 0 : b4 = *iso++;
708 0 : else if (l == 2)
709 : {
710 0 : b3 = *iso++;
711 0 : b4 = *iso++;
712 : }
713 0 : else if (l == 3)
714 : {
715 0 : b2 = *iso++;
716 0 : b3 = *iso++;
717 0 : b4 = *iso++;
718 : }
719 0 : else if (l == 4)
720 : {
721 0 : b1 = *iso++;
722 0 : b2 = *iso++;
723 0 : b3 = *iso++;
724 0 : b4 = *iso++;
725 : }
726 : else
727 : {
728 0 : elog(ERROR, "unsupported character length %d", l);
729 : iiso = 0; /* keep compiler quiet */
730 : }
731 0 : iiso = (b1 << 24 | b2 << 16 | b3 << 8 | b4);
732 :
733 0 : if (map)
734 : {
735 0 : uint32 converted = pg_mb_radix_conv(map, l, b1, b2, b3, b4);
736 :
737 0 : if (converted)
738 : {
739 0 : utf = store_coded_char(utf, converted);
740 0 : continue;
741 : }
742 :
743 : /* If there's a combined character map, try that */
744 0 : if (cmap)
745 : {
746 0 : cp = bsearch(&iiso, cmap, cmapsize,
747 : sizeof(pg_local_to_utf_combined), compare4);
748 :
749 0 : if (cp)
750 : {
751 0 : utf = store_coded_char(utf, cp->utf1);
752 0 : utf = store_coded_char(utf, cp->utf2);
753 0 : continue;
754 : }
755 : }
756 : }
757 :
758 : /* if there's a conversion function, try that */
759 0 : if (conv_func)
760 : {
761 0 : uint32 converted = (*conv_func) (iiso);
762 :
763 0 : if (converted)
764 : {
765 0 : utf = store_coded_char(utf, converted);
766 0 : continue;
767 : }
768 : }
769 :
770 : /* failed to translate this character */
771 0 : report_untranslatable_char(encoding, PG_UTF8,
772 0 : (const char *) (iso - l), len);
773 : }
774 :
775 : /* if we broke out of loop early, must be invalid input */
776 76 : if (len > 0)
777 0 : report_invalid_encoding(encoding, (const char *) iso, len);
778 :
779 76 : *utf = '\0';
780 76 : }
|