Line data Source code
1 : /*-----------------------------------------------------------------------
2 : *
3 : * PostgreSQL locale utilities
4 : *
5 : * Portions Copyright (c) 2002-2017, PostgreSQL Global Development Group
6 : *
7 : * src/backend/utils/adt/pg_locale.c
8 : *
9 : *-----------------------------------------------------------------------
10 : */
11 :
12 : /*----------
13 : * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 : * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 : * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 : * toupper(), etc. are always in the same fixed locale.
17 : *
18 : * LC_MESSAGES is settable at run time and will take effect
19 : * immediately.
20 : *
21 : * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22 : * settable at run-time. However, we don't actually set those locale
23 : * categories permanently. This would have bizarre effects like no
24 : * longer accepting standard floating-point literals in some locales.
25 : * Instead, we only set the locales briefly when needed, cache the
26 : * required information obtained from localeconv(), and set them back.
27 : * The cached information is only used by the formatting functions
28 : * (to_char, etc.) and the money type. For the user, this should all be
29 : * transparent.
30 : *
31 : * !!! NOW HEAR THIS !!!
32 : *
33 : * We've been bitten repeatedly by this bug, so let's try to keep it in
34 : * mind in future: on some platforms, the locale functions return pointers
35 : * to static data that will be overwritten by any later locale function.
36 : * Thus, for example, the obvious-looking sequence
37 : * save = setlocale(category, NULL);
38 : * if (!setlocale(category, value))
39 : * fail = true;
40 : * setlocale(category, save);
41 : * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
42 : * will change the memory save is pointing at. To do this sort of thing
43 : * safely, you *must* pstrdup what setlocale returns the first time.
44 : *
45 : * FYI, The Open Group locale standard is defined here:
46 : *
47 : * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
48 : *----------
49 : */
50 :
51 :
52 : #include "postgres.h"
53 :
54 : #include <time.h>
55 :
56 : #include "access/htup_details.h"
57 : #include "catalog/pg_collation.h"
58 : #include "catalog/pg_control.h"
59 : #include "mb/pg_wchar.h"
60 : #include "utils/builtins.h"
61 : #include "utils/hsearch.h"
62 : #include "utils/lsyscache.h"
63 : #include "utils/memutils.h"
64 : #include "utils/pg_locale.h"
65 : #include "utils/syscache.h"
66 :
67 : #ifdef USE_ICU
68 : #include <unicode/ucnv.h>
69 : #endif
70 :
71 : #ifdef WIN32
72 : /*
73 : * This Windows file defines StrNCpy. We don't need it here, so we undefine
74 : * it to keep the compiler quiet, and undefine it again after the file is
75 : * included, so we don't accidentally use theirs.
76 : */
77 : #undef StrNCpy
78 : #include <shlwapi.h>
79 : #ifdef StrNCpy
80 : #undef STrNCpy
81 : #endif
82 : #endif
83 :
84 : #define MAX_L10N_DATA 80
85 :
86 :
87 : /* GUC settings */
88 : char *locale_messages;
89 : char *locale_monetary;
90 : char *locale_numeric;
91 : char *locale_time;
92 :
93 : /* lc_time localization cache */
94 : char *localized_abbrev_days[7];
95 : char *localized_full_days[7];
96 : char *localized_abbrev_months[12];
97 : char *localized_full_months[12];
98 :
99 : /* indicates whether locale information cache is valid */
100 : static bool CurrentLocaleConvValid = false;
101 : static bool CurrentLCTimeValid = false;
102 :
103 : /* Environment variable storage area */
104 :
105 : #define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
106 :
107 : static char lc_collate_envbuf[LC_ENV_BUFSIZE];
108 : static char lc_ctype_envbuf[LC_ENV_BUFSIZE];
109 :
110 : #ifdef LC_MESSAGES
111 : static char lc_messages_envbuf[LC_ENV_BUFSIZE];
112 : #endif
113 : static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
114 : static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
115 : static char lc_time_envbuf[LC_ENV_BUFSIZE];
116 :
117 : /* Cache for collation-related knowledge */
118 :
119 : typedef struct
120 : {
121 : Oid collid; /* hash key: pg_collation OID */
122 : bool collate_is_c; /* is collation's LC_COLLATE C? */
123 : bool ctype_is_c; /* is collation's LC_CTYPE C? */
124 : bool flags_valid; /* true if above flags are valid */
125 : pg_locale_t locale; /* locale_t struct, or 0 if not valid */
126 : } collation_cache_entry;
127 :
128 : static HTAB *collation_cache = NULL;
129 :
130 :
131 : #if defined(WIN32) && defined(LC_MESSAGES)
132 : static char *IsoLocaleName(const char *); /* MSVC specific */
133 : #endif
134 :
135 :
136 : /*
137 : * pg_perm_setlocale
138 : *
139 : * This wraps the libc function setlocale(), with two additions. First, when
140 : * changing LC_CTYPE, update gettext's encoding for the current message
141 : * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
142 : * not on Windows. Second, if the operation is successful, the corresponding
143 : * LC_XXX environment variable is set to match. By setting the environment
144 : * variable, we ensure that any subsequent use of setlocale(..., "") will
145 : * preserve the settings made through this routine. Of course, LC_ALL must
146 : * also be unset to fully ensure that, but that has to be done elsewhere after
147 : * all the individual LC_XXX variables have been set correctly. (Thank you
148 : * Perl for making this kluge necessary.)
149 : */
150 : char *
151 1273 : pg_perm_setlocale(int category, const char *locale)
152 : {
153 : char *result;
154 : const char *envvar;
155 : char *envbuf;
156 :
157 : #ifndef WIN32
158 1273 : result = setlocale(category, locale);
159 : #else
160 :
161 : /*
162 : * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
163 : * the given value is good and set it in the environment variables. We
164 : * must ignore attempts to set to "", which means "keep using the old
165 : * environment value".
166 : */
167 : #ifdef LC_MESSAGES
168 : if (category == LC_MESSAGES)
169 : {
170 : result = (char *) locale;
171 : if (locale == NULL || locale[0] == '\0')
172 : return result;
173 : }
174 : else
175 : #endif
176 : result = setlocale(category, locale);
177 : #endif /* WIN32 */
178 :
179 1273 : if (result == NULL)
180 0 : return result; /* fall out immediately on failure */
181 :
182 : /*
183 : * Use the right encoding in translated messages. Under ENABLE_NLS, let
184 : * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
185 : * format strings are ASCII, but database-encoding strings may enter the
186 : * message via %s. This makes the overall message encoding equal to the
187 : * database encoding.
188 : */
189 1273 : if (category == LC_CTYPE)
190 : {
191 : static char save_lc_ctype[LC_ENV_BUFSIZE];
192 :
193 : /* copy setlocale() return value before callee invokes it again */
194 341 : strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
195 341 : result = save_lc_ctype;
196 :
197 : #ifdef ENABLE_NLS
198 : SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
199 : #else
200 341 : SetMessageEncoding(GetDatabaseEncoding());
201 : #endif
202 : }
203 :
204 1273 : switch (category)
205 : {
206 : case LC_COLLATE:
207 341 : envvar = "LC_COLLATE";
208 341 : envbuf = lc_collate_envbuf;
209 341 : break;
210 : case LC_CTYPE:
211 341 : envvar = "LC_CTYPE";
212 341 : envbuf = lc_ctype_envbuf;
213 341 : break;
214 : #ifdef LC_MESSAGES
215 : case LC_MESSAGES:
216 573 : envvar = "LC_MESSAGES";
217 573 : envbuf = lc_messages_envbuf;
218 : #ifdef WIN32
219 : result = IsoLocaleName(locale);
220 : if (result == NULL)
221 : result = (char *) locale;
222 : #endif /* WIN32 */
223 573 : break;
224 : #endif /* LC_MESSAGES */
225 : case LC_MONETARY:
226 6 : envvar = "LC_MONETARY";
227 6 : envbuf = lc_monetary_envbuf;
228 6 : break;
229 : case LC_NUMERIC:
230 6 : envvar = "LC_NUMERIC";
231 6 : envbuf = lc_numeric_envbuf;
232 6 : break;
233 : case LC_TIME:
234 6 : envvar = "LC_TIME";
235 6 : envbuf = lc_time_envbuf;
236 6 : break;
237 : default:
238 0 : elog(FATAL, "unrecognized LC category: %d", category);
239 : envvar = NULL; /* keep compiler quiet */
240 : envbuf = NULL;
241 : return NULL;
242 : }
243 :
244 1273 : snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
245 :
246 1273 : if (putenv(envbuf))
247 0 : return NULL;
248 :
249 1273 : return result;
250 : }
251 :
252 :
253 : /*
254 : * Is the locale name valid for the locale category?
255 : *
256 : * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
257 : * canonical name is stored there. This is especially useful for figuring out
258 : * what locale name "" means (ie, the server environment value). (Actually,
259 : * it seems that on most implementations that's the only thing it's good for;
260 : * we could wish that setlocale gave back a canonically spelled version of
261 : * the locale name, but typically it doesn't.)
262 : */
263 : bool
264 2162 : check_locale(int category, const char *locale, char **canonname)
265 : {
266 : char *save;
267 : char *res;
268 :
269 2162 : if (canonname)
270 6 : *canonname = NULL; /* in case of failure */
271 :
272 2162 : save = setlocale(category, NULL);
273 2162 : if (!save)
274 0 : return false; /* won't happen, we hope */
275 :
276 : /* save may be pointing at a modifiable scratch variable, see above. */
277 2162 : save = pstrdup(save);
278 :
279 : /* set the locale with setlocale, to see if it accepts it. */
280 2162 : res = setlocale(category, locale);
281 :
282 : /* save canonical name if requested. */
283 2162 : if (res && canonname)
284 6 : *canonname = pstrdup(res);
285 :
286 : /* restore old value. */
287 2162 : if (!setlocale(category, save))
288 0 : elog(WARNING, "failed to restore old locale \"%s\"", save);
289 2162 : pfree(save);
290 :
291 2162 : return (res != NULL);
292 : }
293 :
294 :
295 : /*
296 : * GUC check/assign hooks
297 : *
298 : * For most locale categories, the assign hook doesn't actually set the locale
299 : * permanently, just reset flags so that the next use will cache the
300 : * appropriate values. (See explanation at the top of this file.)
301 : *
302 : * Note: we accept value = "" as selecting the postmaster's environment
303 : * value, whatever it was (so long as the environment setting is legal).
304 : * This will have been locked down by an earlier call to pg_perm_setlocale.
305 : */
306 : bool
307 569 : check_locale_monetary(char **newval, void **extra, GucSource source)
308 : {
309 569 : return check_locale(LC_MONETARY, *newval, NULL);
310 : }
311 :
312 : void
313 567 : assign_locale_monetary(const char *newval, void *extra)
314 : {
315 567 : CurrentLocaleConvValid = false;
316 567 : }
317 :
318 : bool
319 569 : check_locale_numeric(char **newval, void **extra, GucSource source)
320 : {
321 569 : return check_locale(LC_NUMERIC, *newval, NULL);
322 : }
323 :
324 : void
325 567 : assign_locale_numeric(const char *newval, void *extra)
326 : {
327 567 : CurrentLocaleConvValid = false;
328 567 : }
329 :
330 : bool
331 569 : check_locale_time(char **newval, void **extra, GucSource source)
332 : {
333 569 : return check_locale(LC_TIME, *newval, NULL);
334 : }
335 :
336 : void
337 567 : assign_locale_time(const char *newval, void *extra)
338 : {
339 567 : CurrentLCTimeValid = false;
340 567 : }
341 :
342 : /*
343 : * We allow LC_MESSAGES to actually be set globally.
344 : *
345 : * Note: we normally disallow value = "" because it wouldn't have consistent
346 : * semantics (it'd effectively just use the previous value). However, this
347 : * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
348 : * not even if the attempted setting fails due to invalid environment value.
349 : * The idea there is just to accept the environment setting *if possible*
350 : * during startup, until we can read the proper value from postgresql.conf.
351 : */
352 : bool
353 569 : check_locale_messages(char **newval, void **extra, GucSource source)
354 : {
355 569 : if (**newval == '\0')
356 : {
357 120 : if (source == PGC_S_DEFAULT)
358 120 : return true;
359 : else
360 0 : return false;
361 : }
362 :
363 : /*
364 : * LC_MESSAGES category does not exist everywhere, but accept it anyway
365 : *
366 : * On Windows, we can't even check the value, so accept blindly
367 : */
368 : #if defined(LC_MESSAGES) && !defined(WIN32)
369 449 : return check_locale(LC_MESSAGES, *newval, NULL);
370 : #else
371 : return true;
372 : #endif
373 : }
374 :
375 : void
376 567 : assign_locale_messages(const char *newval, void *extra)
377 : {
378 : /*
379 : * LC_MESSAGES category does not exist everywhere, but accept it anyway.
380 : * We ignore failure, as per comment above.
381 : */
382 : #ifdef LC_MESSAGES
383 567 : (void) pg_perm_setlocale(LC_MESSAGES, newval);
384 : #endif
385 567 : }
386 :
387 :
388 : /*
389 : * Frees the malloced content of a struct lconv. (But not the struct
390 : * itself.) It's important that this not throw elog(ERROR).
391 : */
392 : static void
393 0 : free_struct_lconv(struct lconv *s)
394 : {
395 0 : if (s->decimal_point)
396 0 : free(s->decimal_point);
397 0 : if (s->thousands_sep)
398 0 : free(s->thousands_sep);
399 0 : if (s->grouping)
400 0 : free(s->grouping);
401 0 : if (s->int_curr_symbol)
402 0 : free(s->int_curr_symbol);
403 0 : if (s->currency_symbol)
404 0 : free(s->currency_symbol);
405 0 : if (s->mon_decimal_point)
406 0 : free(s->mon_decimal_point);
407 0 : if (s->mon_thousands_sep)
408 0 : free(s->mon_thousands_sep);
409 0 : if (s->mon_grouping)
410 0 : free(s->mon_grouping);
411 0 : if (s->positive_sign)
412 0 : free(s->positive_sign);
413 0 : if (s->negative_sign)
414 0 : free(s->negative_sign);
415 0 : }
416 :
417 : /*
418 : * Check that all fields of a struct lconv (or at least, the ones we care
419 : * about) are non-NULL. The field list must match free_struct_lconv().
420 : */
421 : static bool
422 7 : struct_lconv_is_valid(struct lconv *s)
423 : {
424 7 : if (s->decimal_point == NULL)
425 0 : return false;
426 7 : if (s->thousands_sep == NULL)
427 0 : return false;
428 7 : if (s->grouping == NULL)
429 0 : return false;
430 7 : if (s->int_curr_symbol == NULL)
431 0 : return false;
432 7 : if (s->currency_symbol == NULL)
433 0 : return false;
434 7 : if (s->mon_decimal_point == NULL)
435 0 : return false;
436 7 : if (s->mon_thousands_sep == NULL)
437 0 : return false;
438 7 : if (s->mon_grouping == NULL)
439 0 : return false;
440 7 : if (s->positive_sign == NULL)
441 0 : return false;
442 7 : if (s->negative_sign == NULL)
443 0 : return false;
444 7 : return true;
445 : }
446 :
447 :
448 : /*
449 : * Convert the strdup'd string at *str from the specified encoding to the
450 : * database encoding.
451 : */
452 : static void
453 56 : db_encoding_convert(int encoding, char **str)
454 : {
455 : char *pstr;
456 : char *mstr;
457 :
458 : /* convert the string to the database encoding */
459 56 : pstr = pg_any_to_server(*str, strlen(*str), encoding);
460 56 : if (pstr == *str)
461 112 : return; /* no conversion happened */
462 :
463 : /* need it malloc'd not palloc'd */
464 0 : mstr = strdup(pstr);
465 0 : if (mstr == NULL)
466 0 : ereport(ERROR,
467 : (errcode(ERRCODE_OUT_OF_MEMORY),
468 : errmsg("out of memory")));
469 :
470 : /* replace old string */
471 0 : free(*str);
472 0 : *str = mstr;
473 :
474 0 : pfree(pstr);
475 : }
476 :
477 :
478 : /*
479 : * Return the POSIX lconv struct (contains number/money formatting
480 : * information) with locale information for all categories.
481 : */
482 : struct lconv *
483 40345 : PGLC_localeconv(void)
484 : {
485 : static struct lconv CurrentLocaleConv;
486 : static bool CurrentLocaleConvAllocated = false;
487 : struct lconv *extlconv;
488 : struct lconv worklconv;
489 40345 : bool trouble = false;
490 : char *save_lc_monetary;
491 : char *save_lc_numeric;
492 : #ifdef WIN32
493 : char *save_lc_ctype;
494 : #endif
495 :
496 : /* Did we do it already? */
497 40345 : if (CurrentLocaleConvValid)
498 40338 : return &CurrentLocaleConv;
499 :
500 : /* Free any already-allocated storage */
501 7 : if (CurrentLocaleConvAllocated)
502 : {
503 0 : free_struct_lconv(&CurrentLocaleConv);
504 0 : CurrentLocaleConvAllocated = false;
505 : }
506 :
507 : /*
508 : * This is tricky because we really don't want to risk throwing error
509 : * while the locale is set to other than our usual settings. Therefore,
510 : * the process is: collect the usual settings, set locale to special
511 : * setting, copy relevant data into worklconv using strdup(), restore
512 : * normal settings, convert data to desired encoding, and finally stash
513 : * the collected data in CurrentLocaleConv. This makes it safe if we
514 : * throw an error during encoding conversion or run out of memory anywhere
515 : * in the process. All data pointed to by struct lconv members is
516 : * allocated with strdup, to avoid premature elog(ERROR) and to allow
517 : * using a single cleanup routine.
518 : */
519 7 : memset(&worklconv, 0, sizeof(worklconv));
520 :
521 : /* Save user's values of monetary and numeric locales */
522 7 : save_lc_monetary = setlocale(LC_MONETARY, NULL);
523 7 : if (save_lc_monetary)
524 7 : save_lc_monetary = pstrdup(save_lc_monetary);
525 :
526 7 : save_lc_numeric = setlocale(LC_NUMERIC, NULL);
527 7 : if (save_lc_numeric)
528 7 : save_lc_numeric = pstrdup(save_lc_numeric);
529 :
530 : #ifdef WIN32
531 :
532 : /*
533 : * Ideally, monetary and numeric local symbols could be returned in any
534 : * server encoding. Unfortunately, the WIN32 API does not allow
535 : * setlocale() to return values in a codepage/CTYPE that uses more than
536 : * two bytes per character, such as UTF-8:
537 : *
538 : * http://msdn.microsoft.com/en-us/library/x99tb11d.aspx
539 : *
540 : * Evidently, LC_CTYPE allows us to control the encoding used for strings
541 : * returned by localeconv(). The Open Group standard, mentioned at the
542 : * top of this C file, doesn't explicitly state this.
543 : *
544 : * Therefore, we set LC_CTYPE to match LC_NUMERIC or LC_MONETARY (which
545 : * cannot be UTF8), call localeconv(), and then convert from the
546 : * numeric/monetary LC_CTYPE to the server encoding. One example use of
547 : * this is for the Euro symbol.
548 : *
549 : * Perhaps someday we will use GetLocaleInfoW() which returns values in
550 : * UTF16 and convert from that.
551 : */
552 :
553 : /* save user's value of ctype locale */
554 : save_lc_ctype = setlocale(LC_CTYPE, NULL);
555 : if (save_lc_ctype)
556 : save_lc_ctype = pstrdup(save_lc_ctype);
557 :
558 : /* Here begins the critical section where we must not throw error */
559 :
560 : /* use numeric to set the ctype */
561 : setlocale(LC_CTYPE, locale_numeric);
562 : #endif
563 :
564 : /* Get formatting information for numeric */
565 7 : setlocale(LC_NUMERIC, locale_numeric);
566 7 : extlconv = localeconv();
567 :
568 : /* Must copy data now in case setlocale() overwrites it */
569 7 : worklconv.decimal_point = strdup(extlconv->decimal_point);
570 7 : worklconv.thousands_sep = strdup(extlconv->thousands_sep);
571 7 : worklconv.grouping = strdup(extlconv->grouping);
572 :
573 : #ifdef WIN32
574 : /* use monetary to set the ctype */
575 : setlocale(LC_CTYPE, locale_monetary);
576 : #endif
577 :
578 : /* Get formatting information for monetary */
579 7 : setlocale(LC_MONETARY, locale_monetary);
580 7 : extlconv = localeconv();
581 :
582 : /* Must copy data now in case setlocale() overwrites it */
583 7 : worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
584 7 : worklconv.currency_symbol = strdup(extlconv->currency_symbol);
585 7 : worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
586 7 : worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
587 7 : worklconv.mon_grouping = strdup(extlconv->mon_grouping);
588 7 : worklconv.positive_sign = strdup(extlconv->positive_sign);
589 7 : worklconv.negative_sign = strdup(extlconv->negative_sign);
590 : /* Copy scalar fields as well */
591 7 : worklconv.int_frac_digits = extlconv->int_frac_digits;
592 7 : worklconv.frac_digits = extlconv->frac_digits;
593 7 : worklconv.p_cs_precedes = extlconv->p_cs_precedes;
594 7 : worklconv.p_sep_by_space = extlconv->p_sep_by_space;
595 7 : worklconv.n_cs_precedes = extlconv->n_cs_precedes;
596 7 : worklconv.n_sep_by_space = extlconv->n_sep_by_space;
597 7 : worklconv.p_sign_posn = extlconv->p_sign_posn;
598 7 : worklconv.n_sign_posn = extlconv->n_sign_posn;
599 :
600 : /* Try to restore internal settings */
601 7 : if (save_lc_monetary)
602 : {
603 7 : if (!setlocale(LC_MONETARY, save_lc_monetary))
604 0 : trouble = true;
605 : }
606 :
607 7 : if (save_lc_numeric)
608 : {
609 7 : if (!setlocale(LC_NUMERIC, save_lc_numeric))
610 0 : trouble = true;
611 : }
612 :
613 : #ifdef WIN32
614 : /* Try to restore internal ctype settings */
615 : if (save_lc_ctype)
616 : {
617 : if (!setlocale(LC_CTYPE, save_lc_ctype))
618 : trouble = true;
619 : }
620 : #endif
621 :
622 : /*
623 : * At this point we've done our best to clean up, and can call functions
624 : * that might possibly throw errors with a clean conscience. But let's
625 : * make sure we don't leak any already-strdup'd fields in worklconv.
626 : */
627 7 : PG_TRY();
628 : {
629 : int encoding;
630 :
631 : /*
632 : * Report it if we failed to restore anything. Perhaps this should be
633 : * FATAL, rather than continuing with bad locale settings?
634 : */
635 7 : if (trouble)
636 0 : elog(WARNING, "failed to restore old locale");
637 :
638 : /* Release the pstrdup'd locale names */
639 7 : if (save_lc_monetary)
640 7 : pfree(save_lc_monetary);
641 7 : if (save_lc_numeric)
642 7 : pfree(save_lc_numeric);
643 : #ifdef WIN32
644 : if (save_lc_ctype)
645 : pfree(save_lc_ctype);
646 : #endif
647 :
648 : /* If any of the preceding strdup calls failed, complain now. */
649 7 : if (!struct_lconv_is_valid(&worklconv))
650 0 : ereport(ERROR,
651 : (errcode(ERRCODE_OUT_OF_MEMORY),
652 : errmsg("out of memory")));
653 :
654 : /*
655 : * Now we must perform encoding conversion from whatever's associated
656 : * with the locale into the database encoding.
657 : */
658 7 : encoding = pg_get_encoding_from_locale(locale_numeric, true);
659 :
660 7 : db_encoding_convert(encoding, &worklconv.decimal_point);
661 7 : db_encoding_convert(encoding, &worklconv.thousands_sep);
662 : /* grouping is not text and does not require conversion */
663 :
664 7 : encoding = pg_get_encoding_from_locale(locale_monetary, true);
665 :
666 7 : db_encoding_convert(encoding, &worklconv.int_curr_symbol);
667 7 : db_encoding_convert(encoding, &worklconv.currency_symbol);
668 7 : db_encoding_convert(encoding, &worklconv.mon_decimal_point);
669 7 : db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
670 : /* mon_grouping is not text and does not require conversion */
671 7 : db_encoding_convert(encoding, &worklconv.positive_sign);
672 7 : db_encoding_convert(encoding, &worklconv.negative_sign);
673 : }
674 0 : PG_CATCH();
675 : {
676 0 : free_struct_lconv(&worklconv);
677 0 : PG_RE_THROW();
678 : }
679 7 : PG_END_TRY();
680 :
681 : /*
682 : * Everything is good, so save the results.
683 : */
684 7 : CurrentLocaleConv = worklconv;
685 7 : CurrentLocaleConvAllocated = true;
686 7 : CurrentLocaleConvValid = true;
687 7 : return &CurrentLocaleConv;
688 : }
689 :
690 : #ifdef WIN32
691 : /*
692 : * On WIN32, strftime() returns the encoding in CP_ACP (the default
693 : * operating system codpage for that computer), which is likely different
694 : * from SERVER_ENCODING. This is especially important in Japanese versions
695 : * of Windows which will use SJIS encoding, which we don't support as a
696 : * server encoding.
697 : *
698 : * So, instead of using strftime(), use wcsftime() to return the value in
699 : * wide characters (internally UTF16) and then convert it to the appropriate
700 : * database encoding.
701 : *
702 : * Note that this only affects the calls to strftime() in this file, which are
703 : * used to get the locale-aware strings. Other parts of the backend use
704 : * pg_strftime(), which isn't locale-aware and does not need to be replaced.
705 : */
706 : static size_t
707 : strftime_win32(char *dst, size_t dstlen,
708 : const char *format, const struct tm *tm)
709 : {
710 : size_t len;
711 : wchar_t wformat[8]; /* formats used below need 3 bytes */
712 : wchar_t wbuf[MAX_L10N_DATA];
713 :
714 : /* get a wchar_t version of the format string */
715 : len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
716 : wformat, lengthof(wformat));
717 : if (len == 0)
718 : elog(ERROR, "could not convert format string from UTF-8: error code %lu",
719 : GetLastError());
720 :
721 : len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
722 : if (len == 0)
723 : {
724 : /*
725 : * strftime failed, possibly because the result would not fit in
726 : * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
727 : */
728 : return 0;
729 : }
730 :
731 : len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
732 : NULL, NULL);
733 : if (len == 0)
734 : elog(ERROR, "could not convert string to UTF-8: error code %lu",
735 : GetLastError());
736 :
737 : dst[len] = '\0';
738 : if (GetDatabaseEncoding() != PG_UTF8)
739 : {
740 : char *convstr = pg_any_to_server(dst, len, PG_UTF8);
741 :
742 : if (convstr != dst)
743 : {
744 : strlcpy(dst, convstr, dstlen);
745 : len = strlen(dst);
746 : pfree(convstr);
747 : }
748 : }
749 :
750 : return len;
751 : }
752 :
753 : /* redefine strftime() */
754 : #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
755 : #endif /* WIN32 */
756 :
757 : /* Subroutine for cache_locale_time(). */
758 : static void
759 152 : cache_single_time(char **dst, const char *format, const struct tm *tm)
760 : {
761 : char buf[MAX_L10N_DATA];
762 : char *ptr;
763 :
764 : /*
765 : * MAX_L10N_DATA is sufficient buffer space for every known locale, and
766 : * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
767 : * error.) An implementation might report errors (e.g. ENOMEM) by
768 : * returning 0 (or, less plausibly, a negative value) and setting errno.
769 : * Report errno just in case the implementation did that, but clear it in
770 : * advance of the call so we don't emit a stale, unrelated errno.
771 : */
772 152 : errno = 0;
773 152 : if (strftime(buf, MAX_L10N_DATA, format, tm) <= 0)
774 0 : elog(ERROR, "strftime(%s) failed: %m", format);
775 :
776 152 : ptr = MemoryContextStrdup(TopMemoryContext, buf);
777 152 : if (*dst)
778 0 : pfree(*dst);
779 152 : *dst = ptr;
780 152 : }
781 :
782 : /*
783 : * Update the lc_time localization cache variables if needed.
784 : */
785 : void
786 1419 : cache_locale_time(void)
787 : {
788 : char *save_lc_time;
789 : time_t timenow;
790 : struct tm *timeinfo;
791 : int i;
792 :
793 : #ifdef WIN32
794 : char *save_lc_ctype;
795 : #endif
796 :
797 : /* did we do this already? */
798 1419 : if (CurrentLCTimeValid)
799 2834 : return;
800 :
801 4 : elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
802 :
803 : /* save user's value of time locale */
804 4 : save_lc_time = setlocale(LC_TIME, NULL);
805 4 : if (save_lc_time)
806 4 : save_lc_time = pstrdup(save_lc_time);
807 :
808 : #ifdef WIN32
809 :
810 : /*
811 : * On WIN32, there is no way to get locale-specific time values in a
812 : * specified locale, like we do for monetary/numeric. We can only get
813 : * CP_ACP (see strftime_win32) or UTF16. Therefore, we get UTF16 and
814 : * convert it to the database locale. However, wcsftime() internally uses
815 : * LC_CTYPE, so we set it here. See the WIN32 comment near the top of
816 : * PGLC_localeconv().
817 : */
818 :
819 : /* save user's value of ctype locale */
820 : save_lc_ctype = setlocale(LC_CTYPE, NULL);
821 : if (save_lc_ctype)
822 : save_lc_ctype = pstrdup(save_lc_ctype);
823 :
824 : /* use lc_time to set the ctype */
825 : setlocale(LC_CTYPE, locale_time);
826 : #endif
827 :
828 4 : setlocale(LC_TIME, locale_time);
829 :
830 4 : timenow = time(NULL);
831 4 : timeinfo = localtime(&timenow);
832 :
833 : /* localized days */
834 32 : for (i = 0; i < 7; i++)
835 : {
836 28 : timeinfo->tm_wday = i;
837 28 : cache_single_time(&localized_abbrev_days[i], "%a", timeinfo);
838 28 : cache_single_time(&localized_full_days[i], "%A", timeinfo);
839 : }
840 :
841 : /* localized months */
842 52 : for (i = 0; i < 12; i++)
843 : {
844 48 : timeinfo->tm_mon = i;
845 48 : timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
846 48 : cache_single_time(&localized_abbrev_months[i], "%b", timeinfo);
847 48 : cache_single_time(&localized_full_months[i], "%B", timeinfo);
848 : }
849 :
850 : /* try to restore internal settings */
851 4 : if (save_lc_time)
852 : {
853 4 : if (!setlocale(LC_TIME, save_lc_time))
854 0 : elog(WARNING, "failed to restore old locale");
855 4 : pfree(save_lc_time);
856 : }
857 :
858 : #ifdef WIN32
859 : /* try to restore internal ctype settings */
860 : if (save_lc_ctype)
861 : {
862 : if (!setlocale(LC_CTYPE, save_lc_ctype))
863 : elog(WARNING, "failed to restore old locale");
864 : pfree(save_lc_ctype);
865 : }
866 : #endif
867 :
868 4 : CurrentLCTimeValid = true;
869 : }
870 :
871 :
872 : #if defined(WIN32) && defined(LC_MESSAGES)
873 : /*
874 : * Convert a Windows setlocale() argument to a Unix-style one.
875 : *
876 : * Regardless of platform, we install message catalogs under a Unix-style
877 : * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
878 : * following that style will elicit localized interface strings.
879 : *
880 : * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
881 : * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
882 : * case-insensitive. setlocale() returns the fully-qualified form; for
883 : * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
884 : * setlocale() and _create_locale() select a "locale identifier"[1] and store
885 : * it in an undocumented _locale_t field. From that LCID, we can retrieve the
886 : * ISO 639 language and the ISO 3166 country. Character encoding does not
887 : * matter, because the server and client encodings govern that.
888 : *
889 : * Windows Vista introduced the "locale name" concept[2], closely following
890 : * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
891 : * Studio 2012, setlocale() accepts locale names in addition to the strings it
892 : * accepted historically. It does not standardize them; setlocale("Th-tH")
893 : * returns "Th-tH". setlocale(category, "") still returns a traditional
894 : * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
895 : * content to carry locale names instead of locale identifiers.
896 : *
897 : * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol.
898 : * IsoLocaleName() always fails in a MinGW-built postgres.exe, so only
899 : * Unix-style values of the lc_messages GUC can elicit localized messages. In
900 : * particular, every lc_messages setting that initdb can select automatically
901 : * will yield only C-locale messages. XXX This could be fixed by running the
902 : * fully-qualified locale name through a lookup table.
903 : *
904 : * This function returns a pointer to a static buffer bearing the converted
905 : * name or NULL if conversion fails.
906 : *
907 : * [1] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373763.aspx
908 : * [2] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373814.aspx
909 : */
910 : static char *
911 : IsoLocaleName(const char *winlocname)
912 : {
913 : #if (_MSC_VER >= 1400) /* VC8.0 or later */
914 : static char iso_lc_messages[32];
915 : _locale_t loct = NULL;
916 :
917 : if (pg_strcasecmp("c", winlocname) == 0 ||
918 : pg_strcasecmp("posix", winlocname) == 0)
919 : {
920 : strcpy(iso_lc_messages, "C");
921 : return iso_lc_messages;
922 : }
923 :
924 : loct = _create_locale(LC_CTYPE, winlocname);
925 : if (loct != NULL)
926 : {
927 : #if (_MSC_VER >= 1700) /* Visual Studio 2012 or later */
928 : size_t rc;
929 : char *hyphen;
930 :
931 : /* Locale names use only ASCII, any conversion locale suffices. */
932 : rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
933 : sizeof(iso_lc_messages), NULL);
934 : _free_locale(loct);
935 : if (rc == -1 || rc == sizeof(iso_lc_messages))
936 : return NULL;
937 :
938 : /*
939 : * Since the message catalogs sit on a case-insensitive filesystem, we
940 : * need not standardize letter case here. So long as we do not ship
941 : * message catalogs for which it would matter, we also need not
942 : * translate the script/variant portion, e.g. uz-Cyrl-UZ to
943 : * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
944 : *
945 : * Note that the locale name can be less-specific than the value we
946 : * would derive under earlier Visual Studio releases. For example,
947 : * French_France.1252 yields just "fr". This does not affect any of
948 : * the country-specific message catalogs available as of this writing
949 : * (pt_BR, zh_CN, zh_TW).
950 : */
951 : hyphen = strchr(iso_lc_messages, '-');
952 : if (hyphen)
953 : *hyphen = '_';
954 : #else
955 : char isolang[32],
956 : isocrty[32];
957 : LCID lcid;
958 :
959 : lcid = loct->locinfo->lc_handle[LC_CTYPE];
960 : if (lcid == 0)
961 : lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
962 : _free_locale(loct);
963 :
964 : if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
965 : return NULL;
966 : if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
967 : return NULL;
968 : snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
969 : #endif
970 : return iso_lc_messages;
971 : }
972 : return NULL;
973 : #else
974 : return NULL; /* Not supported on this version of msvc/mingw */
975 : #endif /* _MSC_VER >= 1400 */
976 : }
977 : #endif /* WIN32 && LC_MESSAGES */
978 :
979 :
980 : /*
981 : * Detect aging strxfrm() implementations that, in a subset of locales, write
982 : * past the specified buffer length. Affected users must update OS packages
983 : * before using PostgreSQL 9.5 or later.
984 : *
985 : * Assume that the bug can come and go from one postmaster startup to another
986 : * due to physical replication among diverse machines. Assume that the bug's
987 : * presence will not change during the life of a particular postmaster. Given
988 : * those assumptions, call this no less than once per postmaster startup per
989 : * LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
990 : * there is no need to consider pg_collation locales.
991 : */
992 : void
993 341 : check_strxfrm_bug(void)
994 : {
995 : char buf[32];
996 341 : const int canary = 0x7F;
997 341 : bool ok = true;
998 :
999 : /*
1000 : * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
1001 : * 05/08 returns 18 and modifies 10 bytes. It respects limits above or
1002 : * below that range.
1003 : *
1004 : * The bug is present in Solaris 8 as well; it is absent in Solaris 10
1005 : * 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
1006 : * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
1007 : * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1008 : */
1009 341 : buf[7] = canary;
1010 341 : (void) strxfrm(buf, "ab", 7);
1011 341 : if (buf[7] != canary)
1012 0 : ok = false;
1013 :
1014 : /*
1015 : * illumos bug #1594 was present in the source tree from 2010-10-11 to
1016 : * 2012-02-01. Given an ASCII string of any length and length limit 1,
1017 : * affected systems ignore the length limit and modify a number of bytes
1018 : * one less than the return value. The problem inputs for this bug do not
1019 : * overlap those for the Solaris bug, hence a distinct test.
1020 : *
1021 : * Affected systems include smartos-20110926T021612Z. Affected locales
1022 : * include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
1023 : */
1024 341 : buf[1] = canary;
1025 341 : (void) strxfrm(buf, "a", 1);
1026 341 : if (buf[1] != canary)
1027 0 : ok = false;
1028 :
1029 341 : if (!ok)
1030 0 : ereport(ERROR,
1031 : (errcode(ERRCODE_SYSTEM_ERROR),
1032 : errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1033 : setlocale(LC_COLLATE, NULL)),
1034 : errhint("Apply system library package updates.")));
1035 341 : }
1036 :
1037 :
1038 : /*
1039 : * Cache mechanism for collation information.
1040 : *
1041 : * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1042 : * (or POSIX), so we can optimize a few code paths in various places.
1043 : * For the built-in C and POSIX collations, we can know that without even
1044 : * doing a cache lookup, but we want to support aliases for C/POSIX too.
1045 : * For the "default" collation, there are separate static cache variables,
1046 : * since consulting the pg_collation catalog doesn't tell us what we need.
1047 : *
1048 : * Also, if a pg_locale_t has been requested for a collation, we cache that
1049 : * for the life of a backend.
1050 : *
1051 : * Note that some code relies on the flags not reporting false negatives
1052 : * (that is, saying it's not C when it is). For example, char2wchar()
1053 : * could fail if the locale is C, so str_tolower() shouldn't call it
1054 : * in that case.
1055 : *
1056 : * Note that we currently lack any way to flush the cache. Since we don't
1057 : * support ALTER COLLATION, this is OK. The worst case is that someone
1058 : * drops a collation, and a useless cache entry hangs around in existing
1059 : * backends.
1060 : */
1061 :
1062 : static collation_cache_entry *
1063 4 : lookup_collation_cache(Oid collation, bool set_flags)
1064 : {
1065 : collation_cache_entry *cache_entry;
1066 : bool found;
1067 :
1068 4 : Assert(OidIsValid(collation));
1069 4 : Assert(collation != DEFAULT_COLLATION_OID);
1070 :
1071 4 : if (collation_cache == NULL)
1072 : {
1073 : /* First time through, initialize the hash table */
1074 : HASHCTL ctl;
1075 :
1076 1 : memset(&ctl, 0, sizeof(ctl));
1077 1 : ctl.keysize = sizeof(Oid);
1078 1 : ctl.entrysize = sizeof(collation_cache_entry);
1079 1 : collation_cache = hash_create("Collation cache", 100, &ctl,
1080 : HASH_ELEM | HASH_BLOBS);
1081 : }
1082 :
1083 4 : cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1084 4 : if (!found)
1085 : {
1086 : /*
1087 : * Make sure cache entry is marked invalid, in case we fail before
1088 : * setting things.
1089 : */
1090 2 : cache_entry->flags_valid = false;
1091 2 : cache_entry->locale = 0;
1092 : }
1093 :
1094 4 : if (set_flags && !cache_entry->flags_valid)
1095 : {
1096 : /* Attempt to set the flags */
1097 : HeapTuple tp;
1098 : Form_pg_collation collform;
1099 : const char *collcollate;
1100 : const char *collctype;
1101 :
1102 2 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1103 2 : if (!HeapTupleIsValid(tp))
1104 0 : elog(ERROR, "cache lookup failed for collation %u", collation);
1105 2 : collform = (Form_pg_collation) GETSTRUCT(tp);
1106 :
1107 2 : collcollate = NameStr(collform->collcollate);
1108 2 : collctype = NameStr(collform->collctype);
1109 :
1110 3 : cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1111 1 : (strcmp(collcollate, "POSIX") == 0));
1112 3 : cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1113 1 : (strcmp(collctype, "POSIX") == 0));
1114 :
1115 2 : cache_entry->flags_valid = true;
1116 :
1117 2 : ReleaseSysCache(tp);
1118 : }
1119 :
1120 4 : return cache_entry;
1121 : }
1122 :
1123 :
1124 : /*
1125 : * Detect whether collation's LC_COLLATE property is C
1126 : */
1127 : bool
1128 410479 : lc_collate_is_c(Oid collation)
1129 : {
1130 : /*
1131 : * If we're asked about "collation 0", return false, so that the code will
1132 : * go into the non-C path and report that the collation is bogus.
1133 : */
1134 410479 : if (!OidIsValid(collation))
1135 2 : return false;
1136 :
1137 : /*
1138 : * If we're asked about the default collation, we have to inquire of the C
1139 : * library. Cache the result so we only have to compute it once.
1140 : */
1141 410477 : if (collation == DEFAULT_COLLATION_OID)
1142 : {
1143 : static int result = -1;
1144 : char *localeptr;
1145 :
1146 288533 : if (result >= 0)
1147 288449 : return (bool) result;
1148 84 : localeptr = setlocale(LC_COLLATE, NULL);
1149 84 : if (!localeptr)
1150 0 : elog(ERROR, "invalid LC_COLLATE setting");
1151 :
1152 84 : if (strcmp(localeptr, "C") == 0)
1153 0 : result = true;
1154 84 : else if (strcmp(localeptr, "POSIX") == 0)
1155 0 : result = true;
1156 : else
1157 84 : result = false;
1158 84 : return (bool) result;
1159 : }
1160 :
1161 : /*
1162 : * If we're asked about the built-in C/POSIX collations, we know that.
1163 : */
1164 121944 : if (collation == C_COLLATION_OID ||
1165 : collation == POSIX_COLLATION_OID)
1166 121942 : return true;
1167 :
1168 : /*
1169 : * Otherwise, we have to consult pg_collation, but we cache that.
1170 : */
1171 2 : return (lookup_collation_cache(collation, true))->collate_is_c;
1172 : }
1173 :
1174 : /*
1175 : * Detect whether collation's LC_CTYPE property is C
1176 : */
1177 : bool
1178 37652 : lc_ctype_is_c(Oid collation)
1179 : {
1180 : /*
1181 : * If we're asked about "collation 0", return false, so that the code will
1182 : * go into the non-C path and report that the collation is bogus.
1183 : */
1184 37652 : if (!OidIsValid(collation))
1185 0 : return false;
1186 :
1187 : /*
1188 : * If we're asked about the default collation, we have to inquire of the C
1189 : * library. Cache the result so we only have to compute it once.
1190 : */
1191 37652 : if (collation == DEFAULT_COLLATION_OID)
1192 : {
1193 : static int result = -1;
1194 : char *localeptr;
1195 :
1196 37614 : if (result >= 0)
1197 37557 : return (bool) result;
1198 57 : localeptr = setlocale(LC_CTYPE, NULL);
1199 57 : if (!localeptr)
1200 0 : elog(ERROR, "invalid LC_CTYPE setting");
1201 :
1202 57 : if (strcmp(localeptr, "C") == 0)
1203 0 : result = true;
1204 57 : else if (strcmp(localeptr, "POSIX") == 0)
1205 0 : result = true;
1206 : else
1207 57 : result = false;
1208 57 : return (bool) result;
1209 : }
1210 :
1211 : /*
1212 : * If we're asked about the built-in C/POSIX collations, we know that.
1213 : */
1214 38 : if (collation == C_COLLATION_OID ||
1215 : collation == POSIX_COLLATION_OID)
1216 36 : return true;
1217 :
1218 : /*
1219 : * Otherwise, we have to consult pg_collation, but we cache that.
1220 : */
1221 2 : return (lookup_collation_cache(collation, true))->ctype_is_c;
1222 : }
1223 :
1224 :
1225 : /* simple subroutine for reporting errors from newlocale() */
1226 : #ifdef HAVE_LOCALE_T
1227 : static void
1228 0 : report_newlocale_failure(const char *localename)
1229 : {
1230 : int save_errno;
1231 :
1232 : /*
1233 : * Windows doesn't provide any useful error indication from
1234 : * _create_locale(), and BSD-derived platforms don't seem to feel they
1235 : * need to set errno either (even though POSIX is pretty clear that
1236 : * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1237 : * is what to report.
1238 : */
1239 0 : if (errno == 0)
1240 0 : errno = ENOENT;
1241 :
1242 : /*
1243 : * ENOENT means "no such locale", not "no such file", so clarify that
1244 : * errno with an errdetail message.
1245 : */
1246 0 : save_errno = errno; /* auxiliary funcs might change errno */
1247 0 : ereport(ERROR,
1248 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1249 : errmsg("could not create locale \"%s\": %m",
1250 : localename),
1251 : (save_errno == ENOENT ?
1252 : errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1253 : localename) : 0)));
1254 : }
1255 : #endif /* HAVE_LOCALE_T */
1256 :
1257 :
1258 : /*
1259 : * Create a locale_t from a collation OID. Results are cached for the
1260 : * lifetime of the backend. Thus, do not free the result with freelocale().
1261 : *
1262 : * As a special optimization, the default/database collation returns 0.
1263 : * Callers should then revert to the non-locale_t-enabled code path.
1264 : * In fact, they shouldn't call this function at all when they are dealing
1265 : * with the default locale. That can save quite a bit in hotspots.
1266 : * Also, callers should avoid calling this before going down a C/POSIX
1267 : * fastpath, because such a fastpath should work even on platforms without
1268 : * locale_t support in the C library.
1269 : *
1270 : * For simplicity, we always generate COLLATE + CTYPE even though we
1271 : * might only need one of them. Since this is called only once per session,
1272 : * it shouldn't cost much.
1273 : */
1274 : pg_locale_t
1275 0 : pg_newlocale_from_collation(Oid collid)
1276 : {
1277 : collation_cache_entry *cache_entry;
1278 :
1279 : /* Callers must pass a valid OID */
1280 0 : Assert(OidIsValid(collid));
1281 :
1282 : /* Return 0 for "default" collation, just in case caller forgets */
1283 0 : if (collid == DEFAULT_COLLATION_OID)
1284 0 : return (pg_locale_t) 0;
1285 :
1286 0 : cache_entry = lookup_collation_cache(collid, false);
1287 :
1288 0 : if (cache_entry->locale == 0)
1289 : {
1290 : /* We haven't computed this yet in this session, so do it */
1291 : HeapTuple tp;
1292 : Form_pg_collation collform;
1293 : const char *collcollate;
1294 : const char *collctype pg_attribute_unused();
1295 : pg_locale_t result;
1296 : Datum collversion;
1297 : bool isnull;
1298 :
1299 0 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1300 0 : if (!HeapTupleIsValid(tp))
1301 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
1302 0 : collform = (Form_pg_collation) GETSTRUCT(tp);
1303 :
1304 0 : collcollate = NameStr(collform->collcollate);
1305 0 : collctype = NameStr(collform->collctype);
1306 :
1307 0 : result = malloc(sizeof(*result));
1308 0 : memset(result, 0, sizeof(*result));
1309 0 : result->provider = collform->collprovider;
1310 :
1311 0 : if (collform->collprovider == COLLPROVIDER_LIBC)
1312 : {
1313 : #ifdef HAVE_LOCALE_T
1314 : locale_t loc;
1315 :
1316 0 : if (strcmp(collcollate, collctype) == 0)
1317 : {
1318 : /* Normal case where they're the same */
1319 0 : errno = 0;
1320 : #ifndef WIN32
1321 0 : loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1322 : NULL);
1323 : #else
1324 : loc = _create_locale(LC_ALL, collcollate);
1325 : #endif
1326 0 : if (!loc)
1327 0 : report_newlocale_failure(collcollate);
1328 : }
1329 : else
1330 : {
1331 : #ifndef WIN32
1332 : /* We need two newlocale() steps */
1333 : locale_t loc1;
1334 :
1335 0 : errno = 0;
1336 0 : loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1337 0 : if (!loc1)
1338 0 : report_newlocale_failure(collcollate);
1339 0 : errno = 0;
1340 0 : loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1341 0 : if (!loc)
1342 0 : report_newlocale_failure(collctype);
1343 : #else
1344 :
1345 : /*
1346 : * XXX The _create_locale() API doesn't appear to support
1347 : * this. Could perhaps be worked around by changing
1348 : * pg_locale_t to contain two separate fields.
1349 : */
1350 : ereport(ERROR,
1351 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1352 : errmsg("collations with different collate and ctype values are not supported on this platform")));
1353 : #endif
1354 : }
1355 :
1356 0 : result->info.lt = loc;
1357 : #else /* not HAVE_LOCALE_T */
1358 : /* platform that doesn't support locale_t */
1359 : ereport(ERROR,
1360 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1361 : errmsg("collation provider LIBC is not supported on this platform")));
1362 : #endif /* not HAVE_LOCALE_T */
1363 : }
1364 0 : else if (collform->collprovider == COLLPROVIDER_ICU)
1365 : {
1366 : #ifdef USE_ICU
1367 : UCollator *collator;
1368 : UErrorCode status;
1369 :
1370 : if (strcmp(collcollate, collctype) != 0)
1371 : ereport(ERROR,
1372 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1373 : errmsg("collations with different collate and ctype values are not supported by ICU")));
1374 :
1375 : status = U_ZERO_ERROR;
1376 : collator = ucol_open(collcollate, &status);
1377 : if (U_FAILURE(status))
1378 : ereport(ERROR,
1379 : (errmsg("could not open collator for locale \"%s\": %s",
1380 : collcollate, u_errorName(status))));
1381 :
1382 : result->info.icu.locale = strdup(collcollate);
1383 : result->info.icu.ucol = collator;
1384 : #else /* not USE_ICU */
1385 : /* could get here if a collation was created by a build with ICU */
1386 0 : ereport(ERROR,
1387 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1388 : errmsg("ICU is not supported in this build"), \
1389 : errhint("You need to rebuild PostgreSQL using --with-icu.")));
1390 : #endif /* not USE_ICU */
1391 : }
1392 :
1393 0 : collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1394 : &isnull);
1395 0 : if (!isnull)
1396 : {
1397 : char *actual_versionstr;
1398 : char *collversionstr;
1399 :
1400 0 : actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
1401 0 : if (!actual_versionstr)
1402 : {
1403 : /*
1404 : * This could happen when specifying a version in CREATE
1405 : * COLLATION for a libc locale, or manually creating a mess in
1406 : * the catalogs.
1407 : */
1408 0 : ereport(ERROR,
1409 : (errmsg("collation \"%s\" has no actual version, but a version was specified",
1410 : NameStr(collform->collname))));
1411 : }
1412 0 : collversionstr = TextDatumGetCString(collversion);
1413 :
1414 0 : if (strcmp(actual_versionstr, collversionstr) != 0)
1415 0 : ereport(WARNING,
1416 : (errmsg("collation \"%s\" has version mismatch",
1417 : NameStr(collform->collname)),
1418 : errdetail("The collation in the database was created using version %s, "
1419 : "but the operating system provides version %s.",
1420 : collversionstr, actual_versionstr),
1421 : errhint("Rebuild all objects affected by this collation and run "
1422 : "ALTER COLLATION %s REFRESH VERSION, "
1423 : "or build PostgreSQL with the right library version.",
1424 : quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1425 : NameStr(collform->collname)))));
1426 : }
1427 :
1428 0 : ReleaseSysCache(tp);
1429 :
1430 0 : cache_entry->locale = result;
1431 : }
1432 :
1433 0 : return cache_entry->locale;
1434 : }
1435 :
1436 : /*
1437 : * Get provider-specific collation version string for the given collation from
1438 : * the operating system/library.
1439 : *
1440 : * A particular provider must always either return a non-NULL string or return
1441 : * NULL (if it doesn't support versions). It must not return NULL for some
1442 : * collcollate and not NULL for others.
1443 : */
1444 : char *
1445 6 : get_collation_actual_version(char collprovider, const char *collcollate)
1446 : {
1447 : char *collversion;
1448 :
1449 : #ifdef USE_ICU
1450 : if (collprovider == COLLPROVIDER_ICU)
1451 : {
1452 : UCollator *collator;
1453 : UErrorCode status;
1454 : UVersionInfo versioninfo;
1455 : char buf[U_MAX_VERSION_STRING_LENGTH];
1456 :
1457 : status = U_ZERO_ERROR;
1458 : collator = ucol_open(collcollate, &status);
1459 : if (U_FAILURE(status))
1460 : ereport(ERROR,
1461 : (errmsg("could not open collator for locale \"%s\": %s",
1462 : collcollate, u_errorName(status))));
1463 : ucol_getVersion(collator, versioninfo);
1464 : ucol_close(collator);
1465 :
1466 : u_versionToString(versioninfo, buf);
1467 : collversion = pstrdup(buf);
1468 : }
1469 : else
1470 : #endif
1471 6 : collversion = NULL;
1472 :
1473 6 : return collversion;
1474 : }
1475 :
1476 :
1477 : #ifdef USE_ICU
1478 : /*
1479 : * Converter object for converting between ICU's UChar strings and C strings
1480 : * in database encoding. Since the database encoding doesn't change, we only
1481 : * need one of these per session.
1482 : */
1483 : static UConverter *icu_converter = NULL;
1484 :
1485 : static void
1486 : init_icu_converter(void)
1487 : {
1488 : const char *icu_encoding_name;
1489 : UErrorCode status;
1490 : UConverter *conv;
1491 :
1492 : if (icu_converter)
1493 : return;
1494 :
1495 : icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1496 :
1497 : status = U_ZERO_ERROR;
1498 : conv = ucnv_open(icu_encoding_name, &status);
1499 : if (U_FAILURE(status))
1500 : ereport(ERROR,
1501 : (errmsg("could not open ICU converter for encoding \"%s\": %s",
1502 : icu_encoding_name, u_errorName(status))));
1503 :
1504 : icu_converter = conv;
1505 : }
1506 :
1507 : /*
1508 : * Convert a string in the database encoding into a string of UChars.
1509 : *
1510 : * The source string at buff is of length nbytes
1511 : * (it needn't be nul-terminated)
1512 : *
1513 : * *buff_uchar receives a pointer to the palloc'd result string, and
1514 : * the function's result is the number of UChars generated.
1515 : *
1516 : * The result string is nul-terminated, though most callers rely on the
1517 : * result length instead.
1518 : */
1519 : int32_t
1520 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
1521 : {
1522 : UErrorCode status;
1523 : int32_t len_uchar;
1524 :
1525 : init_icu_converter();
1526 :
1527 : status = U_ZERO_ERROR;
1528 : len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
1529 : buff, nbytes, &status);
1530 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1531 : ereport(ERROR,
1532 : (errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
1533 :
1534 : *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
1535 :
1536 : status = U_ZERO_ERROR;
1537 : len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
1538 : buff, nbytes, &status);
1539 : if (U_FAILURE(status))
1540 : ereport(ERROR,
1541 : (errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
1542 :
1543 : return len_uchar;
1544 : }
1545 :
1546 : /*
1547 : * Convert a string of UChars into the database encoding.
1548 : *
1549 : * The source string at buff_uchar is of length len_uchar
1550 : * (it needn't be nul-terminated)
1551 : *
1552 : * *result receives a pointer to the palloc'd result string, and the
1553 : * function's result is the number of bytes generated (not counting nul).
1554 : *
1555 : * The result string is nul-terminated.
1556 : */
1557 : int32_t
1558 : icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
1559 : {
1560 : UErrorCode status;
1561 : int32_t len_result;
1562 :
1563 : init_icu_converter();
1564 :
1565 : status = U_ZERO_ERROR;
1566 : len_result = ucnv_fromUChars(icu_converter, NULL, 0,
1567 : buff_uchar, len_uchar, &status);
1568 : if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1569 : ereport(ERROR,
1570 : (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
1571 :
1572 : *result = palloc(len_result + 1);
1573 :
1574 : status = U_ZERO_ERROR;
1575 : len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1576 : buff_uchar, len_uchar, &status);
1577 : if (U_FAILURE(status))
1578 : ereport(ERROR,
1579 : (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
1580 :
1581 : return len_result;
1582 : }
1583 : #endif /* USE_ICU */
1584 :
1585 : /*
1586 : * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1587 : * Therefore we keep them here rather than with the mbutils code.
1588 : */
1589 :
1590 : #ifdef USE_WIDE_UPPER_LOWER
1591 :
1592 : /*
1593 : * wchar2char --- convert wide characters to multibyte format
1594 : *
1595 : * This has the same API as the standard wcstombs_l() function; in particular,
1596 : * tolen is the maximum number of bytes to store at *to, and *from must be
1597 : * zero-terminated. The output will be zero-terminated iff there is room.
1598 : */
1599 : size_t
1600 3339 : wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
1601 : {
1602 : size_t result;
1603 :
1604 3339 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1605 :
1606 3339 : if (tolen == 0)
1607 0 : return 0;
1608 :
1609 : #ifdef WIN32
1610 :
1611 : /*
1612 : * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1613 : * for some reason mbstowcs and wcstombs won't do this for us, so we use
1614 : * MultiByteToWideChar().
1615 : */
1616 : if (GetDatabaseEncoding() == PG_UTF8)
1617 : {
1618 : result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
1619 : NULL, NULL);
1620 : /* A zero return is failure */
1621 : if (result <= 0)
1622 : result = -1;
1623 : else
1624 : {
1625 : Assert(result <= tolen);
1626 : /* Microsoft counts the zero terminator in the result */
1627 : result--;
1628 : }
1629 : }
1630 : else
1631 : #endif /* WIN32 */
1632 3339 : if (locale == (pg_locale_t) 0)
1633 : {
1634 : /* Use wcstombs directly for the default locale */
1635 3339 : result = wcstombs(to, from, tolen);
1636 : }
1637 : else
1638 : {
1639 : #ifdef HAVE_LOCALE_T
1640 : #ifdef HAVE_WCSTOMBS_L
1641 : /* Use wcstombs_l for nondefault locales */
1642 : result = wcstombs_l(to, from, tolen, locale->info.lt);
1643 : #else /* !HAVE_WCSTOMBS_L */
1644 : /* We have to temporarily set the locale as current ... ugh */
1645 0 : locale_t save_locale = uselocale(locale->info.lt);
1646 :
1647 0 : result = wcstombs(to, from, tolen);
1648 :
1649 0 : uselocale(save_locale);
1650 : #endif /* HAVE_WCSTOMBS_L */
1651 : #else /* !HAVE_LOCALE_T */
1652 : /* Can't have locale != 0 without HAVE_LOCALE_T */
1653 : elog(ERROR, "wcstombs_l is not available");
1654 : result = 0; /* keep compiler quiet */
1655 : #endif /* HAVE_LOCALE_T */
1656 : }
1657 :
1658 3339 : return result;
1659 : }
1660 :
1661 : /*
1662 : * char2wchar --- convert multibyte characters to wide characters
1663 : *
1664 : * This has almost the API of mbstowcs_l(), except that *from need not be
1665 : * null-terminated; instead, the number of input bytes is specified as
1666 : * fromlen. Also, we ereport() rather than returning -1 for invalid
1667 : * input encoding. tolen is the maximum number of wchar_t's to store at *to.
1668 : * The output will be zero-terminated iff there is room.
1669 : */
1670 : size_t
1671 3793 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
1672 : pg_locale_t locale)
1673 : {
1674 : size_t result;
1675 :
1676 3793 : Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1677 :
1678 3793 : if (tolen == 0)
1679 0 : return 0;
1680 :
1681 : #ifdef WIN32
1682 : /* See WIN32 "Unicode" comment above */
1683 : if (GetDatabaseEncoding() == PG_UTF8)
1684 : {
1685 : /* Win32 API does not work for zero-length input */
1686 : if (fromlen == 0)
1687 : result = 0;
1688 : else
1689 : {
1690 : result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
1691 : /* A zero return is failure */
1692 : if (result == 0)
1693 : result = -1;
1694 : }
1695 :
1696 : if (result != -1)
1697 : {
1698 : Assert(result < tolen);
1699 : /* Append trailing null wchar (MultiByteToWideChar() does not) */
1700 : to[result] = 0;
1701 : }
1702 : }
1703 : else
1704 : #endif /* WIN32 */
1705 : {
1706 : /* mbstowcs requires ending '\0' */
1707 3793 : char *str = pnstrdup(from, fromlen);
1708 :
1709 3793 : if (locale == (pg_locale_t) 0)
1710 : {
1711 : /* Use mbstowcs directly for the default locale */
1712 3793 : result = mbstowcs(to, str, tolen);
1713 : }
1714 : else
1715 : {
1716 : #ifdef HAVE_LOCALE_T
1717 : #ifdef HAVE_MBSTOWCS_L
1718 : /* Use mbstowcs_l for nondefault locales */
1719 : result = mbstowcs_l(to, str, tolen, locale->info.lt);
1720 : #else /* !HAVE_MBSTOWCS_L */
1721 : /* We have to temporarily set the locale as current ... ugh */
1722 0 : locale_t save_locale = uselocale(locale->info.lt);
1723 :
1724 0 : result = mbstowcs(to, str, tolen);
1725 :
1726 0 : uselocale(save_locale);
1727 : #endif /* HAVE_MBSTOWCS_L */
1728 : #else /* !HAVE_LOCALE_T */
1729 : /* Can't have locale != 0 without HAVE_LOCALE_T */
1730 : elog(ERROR, "mbstowcs_l is not available");
1731 : result = 0; /* keep compiler quiet */
1732 : #endif /* HAVE_LOCALE_T */
1733 : }
1734 :
1735 3793 : pfree(str);
1736 : }
1737 :
1738 3793 : if (result == -1)
1739 : {
1740 : /*
1741 : * Invalid multibyte character encountered. We try to give a useful
1742 : * error message by letting pg_verifymbstr check the string. But it's
1743 : * possible that the string is OK to us, and not OK to mbstowcs ---
1744 : * this suggests that the LC_CTYPE locale is different from the
1745 : * database encoding. Give a generic error message if verifymbstr
1746 : * can't find anything wrong.
1747 : */
1748 0 : pg_verifymbstr(from, fromlen, false); /* might not return */
1749 : /* but if it does ... */
1750 0 : ereport(ERROR,
1751 : (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1752 : errmsg("invalid multibyte character for locale"),
1753 : errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1754 : }
1755 :
1756 3793 : return result;
1757 : }
1758 :
1759 : #endif /* USE_WIDE_UPPER_LOWER */
|