Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ISO8859_1 <--> UTF8
4 : *
5 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
6 : * Portions Copyright (c) 1994, Regents of the University of California
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres.h"
15 : #include "fmgr.h"
16 : #include "mb/pg_wchar.h"
17 :
18 10 : PG_MODULE_MAGIC;
19 :
20 12 : PG_FUNCTION_INFO_V1(iso8859_1_to_utf8);
21 6 : PG_FUNCTION_INFO_V1(utf8_to_iso8859_1);
22 :
23 : /* ----------
24 : * conv_proc(
25 : * INTEGER, -- source encoding id
26 : * INTEGER, -- destination encoding id
27 : * CSTRING, -- source string (null terminated C string)
28 : * CSTRING, -- destination string (null terminated C string)
29 : * INTEGER -- source string length
30 : * ) returns VOID;
31 : * ----------
32 : */
33 :
34 : Datum
35 22 : iso8859_1_to_utf8(PG_FUNCTION_ARGS)
36 : {
37 22 : unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
38 22 : unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
39 22 : int len = PG_GETARG_INT32(4);
40 : unsigned short c;
41 :
42 22 : CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8);
43 :
44 50 : while (len > 0)
45 : {
46 6 : c = *src;
47 6 : if (c == 0)
48 0 : report_invalid_encoding(PG_LATIN1, (const char *) src, len);
49 6 : if (!IS_HIGHBIT_SET(c))
50 6 : *dest++ = c;
51 : else
52 : {
53 0 : *dest++ = (c >> 6) | 0xc0;
54 0 : *dest++ = (c & 0x003f) | HIGHBIT;
55 : }
56 6 : src++;
57 6 : len--;
58 : }
59 22 : *dest = '\0';
60 :
61 22 : PG_RETURN_VOID();
62 : }
63 :
64 : Datum
65 4 : utf8_to_iso8859_1(PG_FUNCTION_ARGS)
66 : {
67 4 : unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
68 4 : unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
69 4 : int len = PG_GETARG_INT32(4);
70 : unsigned short c,
71 : c1;
72 :
73 4 : CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_LATIN1);
74 :
75 4 : while (len > 0)
76 : {
77 6 : c = *src;
78 6 : if (c == 0)
79 0 : report_invalid_encoding(PG_UTF8, (const char *) src, len);
80 : /* fast path for ASCII-subset characters */
81 6 : if (!IS_HIGHBIT_SET(c))
82 : {
83 6 : *dest++ = c;
84 6 : src++;
85 6 : len--;
86 : }
87 : else
88 : {
89 0 : int l = pg_utf_mblen(src);
90 :
91 0 : if (l > len || !pg_utf8_islegal(src, l))
92 0 : report_invalid_encoding(PG_UTF8, (const char *) src, len);
93 0 : if (l != 2)
94 0 : report_untranslatable_char(PG_UTF8, PG_LATIN1,
95 : (const char *) src, len);
96 0 : c1 = src[1] & 0x3f;
97 0 : c = ((c & 0x1f) << 6) | c1;
98 0 : if (c >= 0x80 && c <= 0xff)
99 : {
100 0 : *dest++ = (unsigned char) c;
101 0 : src += 2;
102 0 : len -= 2;
103 : }
104 : else
105 0 : report_untranslatable_char(PG_UTF8, PG_LATIN1,
106 : (const char *) src, len);
107 : }
108 : }
109 4 : *dest = '\0';
110 :
111 4 : PG_RETURN_VOID();
112 : }
|