1 /* Implementation of the internal dcigettext function.
2 Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc.
4 This file is part of the GNU C Library. Its master source is NOT part of
5 the C library, however.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Library General Public License for more details.
17 You should have received a copy of the GNU Library General Public
18 License along with the GNU C Library; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
26 #include <sys/types.h>
28 #if defined __GNUC__ && !defined C_ALLOCA
29 # define alloca __builtin_alloca
30 # define HAVE_ALLOCA 1
32 # if (defined HAVE_ALLOCA_H || defined _LIBC) && !defined C_ALLOCA
50 # define __set_errno(val) errno = (val)
53 #if defined STDC_HEADERS || defined _LIBC
64 #if defined HAVE_STRING_H || defined _LIBC
66 # define _GNU_SOURCE 1
72 #if !HAVE_STRCHR && !defined _LIBC
78 #if defined HAVE_UNISTD_H || defined _LIBC
82 #if defined HAVE_LOCALE_H || defined _LIBC
86 #if defined HAVE_SYS_PARAM_H || defined _LIBC
87 # include <sys/param.h>
95 # include "libgettext.h"
97 #include "hash-string.h"
99 /* Thread safetyness. */
101 # include <bits/libc-lock.h>
104 /* @@ end of prolog @@ */
107 /* Rename the non ANSI C functions. This is required by the standard
108 because some ANSI C functions will require linking with this object
109 file and the name space must not be polluted. */
110 # define getcwd __getcwd
112 # define stpcpy __stpcpy
115 # if !defined HAVE_GETCWD
117 # define getcwd(buf, max) getwd (buf)
122 static char *stpcpy
PARAMS ((char *dest
, const char *src
));
124 # ifndef HAVE_MEMPCPY
125 static void *mempcpy
PARAMS ((void *dest
, const void *src
, size_t n
));
129 /* Amount to increase buffer size by in each try. */
132 /* The following is from pathmax.h. */
133 /* Non-POSIX BSD systems might have gcc's limits.h, which doesn't define
134 PATH_MAX but might cause redefinition warnings when sys/param.h is
135 later included (as on MORE/BSD 4.3). */
136 #if defined _POSIX_VERSION || (defined HAVE_LIMITS_H && !defined __GNUC__)
140 #ifndef _POSIX_PATH_MAX
141 # define _POSIX_PATH_MAX 255
144 #if !defined PATH_MAX && defined _PC_PATH_MAX
145 # define PATH_MAX (pathconf ("/", _PC_PATH_MAX) < 1 ? 1024 : pathconf ("/", _PC_PATH_MAX))
148 /* Don't include sys/param.h if it already has been. */
149 #if defined HAVE_SYS_PARAM_H && !defined PATH_MAX && !defined MAXPATHLEN
150 # include <sys/param.h>
153 #if !defined PATH_MAX && defined MAXPATHLEN
154 # define PATH_MAX MAXPATHLEN
158 # define PATH_MAX _POSIX_PATH_MAX
161 /* XPG3 defines the result of `setlocale (category, NULL)' as:
162 ``Directs `setlocale()' to query `category' and return the current
163 setting of `local'.''
164 However it does not specify the exact format. And even worse: POSIX
165 defines this not at all. So we can use this feature only on selected
166 system (e.g. those using GNU C Library). */
168 # define HAVE_LOCALE_NULL
171 /* We want to allocate a string at the end of the struct. gcc makes
179 /* This is the type used for the search tree where known translations
181 struct known_translation_t
183 /* Domain in which to search. */
187 unsigned long int plindex
;
192 /* State of the catalog counter at the point the string was found. */
195 /* And finally the translation. */
196 const char *translation
;
198 /* Pointer to the string in question. */
202 /* Root of the search tree with known translations. We can use this
203 only if the system provides the `tsearch' function family. */
204 #if defined HAVE_TSEARCH || defined _LIBC
210 # define tsearch __tsearch
213 /* Function to compare two entries in the table of known translations. */
215 transcmp (const void *p1
, const void *p2
)
217 struct known_translation_t
*s1
= (struct known_translation_t
*) p1
;
218 struct known_translation_t
*s2
= (struct known_translation_t
*) p2
;
221 result
= strcmp (s1
->msgid
, s2
->msgid
);
224 result
= strcmp (s1
->msgid
, s2
->msgid
);
227 result
= s1
->plindex
- s2
->plindex
;
229 /* We compare the category last (though this is the cheapest
230 operation) since it is hopefully always the same (namely
232 result
= s1
->category
- s2
->category
;
240 /* Name of the default domain used for gettext(3) prior any call to
241 textdomain(3). The default value for this is "messages". */
242 const char _nl_default_default_domain
[] = "messages";
244 /* Value used as the default domain for gettext(3). */
245 const char *_nl_current_default_domain
= _nl_default_default_domain
;
247 /* Contains the default location of the message catalogs. */
248 const char _nl_default_dirname
[] = GNULOCALEDIR
;
250 /* List with bindings of specific domains created by bindtextdomain()
252 struct binding
*_nl_domain_bindings
;
254 /* Prototypes for local functions. */
255 static unsigned long int plural_eval (struct expression
*pexp
,
256 unsigned long int n
) internal_function
;
257 static const char *category_to_name
PARAMS ((int category
)) internal_function
;
258 static const char *guess_category_value
PARAMS ((int category
,
259 const char *categoryname
))
263 /* For those loosing systems which don't have `alloca' we have to add
264 some additional code emulating it. */
266 /* Nothing has to be done. */
267 # define ADD_BLOCK(list, address) /* nothing */
268 # define FREE_BLOCKS(list) /* nothing */
273 struct block_list
*next
;
275 # define ADD_BLOCK(list, addr) \
277 struct block_list *newp = (struct block_list *) malloc (sizeof (*newp)); \
278 /* If we cannot get a free block we cannot add the new element to \
280 if (newp != NULL) { \
281 newp->address = (addr); \
282 newp->next = (list); \
286 # define FREE_BLOCKS(list) \
288 while (list != NULL) { \
289 struct block_list *old = list; \
295 # define alloca(size) (malloc (size))
296 #endif /* have alloca */
299 /* Names for the libintl functions are a problem. They must not clash
300 with existing names and they should follow ANSI C. But this source
301 code is also used in GNU C Library where the names have a __
302 prefix. So we have to make a difference here. */
304 # define DCIGETTEXT __dcigettext
306 # define DCIGETTEXT dcigettext__
309 /* Checking whether the binaries runs SUID must be done and glibc provides
310 easier methods therefore we make a difference here. */
312 # define ENABLE_SECURE __libc_enable_secure
313 # define DETERMINE_SECURE
315 static int enable_secure
;
316 # define ENABLE_SECURE (enable_secure == 1)
317 # define DETERMINE_SECURE \
318 if (enable_secure == 0) \
320 if (getuid () != geteuid () || getgid () != getegid ()) \
323 enable_secure = -1; \
327 /* Look up MSGID in the DOMAINNAME message catalog for the current
328 CATEGORY locale and, if PLURAL is nonzero, search over string
329 depending on the plural form determined by N. */
331 DCIGETTEXT (domainname
, msgid1
, msgid2
, plural
, n
, category
)
332 const char *domainname
;
340 struct block_list
*block_list
= NULL
;
342 struct loaded_l10nfile
*domain
;
343 struct binding
*binding
;
344 const char *categoryname
;
345 const char *categoryvalue
;
346 char *dirname
, *xdomainname
;
350 #if defined HAVE_TSEARCH || defined _LIBC
351 struct known_translation_t
*search
;
352 struct known_translation_t
**foundp
= NULL
;
353 size_t msgid_len
= strlen (msgid1
) + 1;
355 size_t domainname_len
;
357 /* If no real MSGID is given return NULL. */
361 #if defined HAVE_TSEARCH || defined _LIBC
364 /* Try to find the translation among those which we found at
366 search
= (struct known_translation_t
*) alloca (sizeof (*search
)
368 memcpy (search
->msgid
, msgid1
, msgid_len
);
369 search
->domain
= (char *) domainname
;
371 search
->category
= category
;
373 foundp
= (struct known_translation_t
**) tfind (search
, &root
, transcmp
);
374 if (foundp
!= NULL
&& (*foundp
)->counter
== _nl_msg_cat_cntr
)
375 return (char *) (*foundp
)->translation
;
379 /* Preserve the `errno' value. */
382 /* See whether this is a SUID binary or not. */
385 /* If DOMAINNAME is NULL, we are interested in the default domain. If
386 CATEGORY is not LC_MESSAGES this might not make much sense but the
387 definition left this undefined. */
388 if (domainname
== NULL
)
389 domainname
= _nl_current_default_domain
;
391 /* First find matching binding. */
392 for (binding
= _nl_domain_bindings
; binding
!= NULL
; binding
= binding
->next
)
394 int compare
= strcmp (domainname
, binding
->domainname
);
400 /* It is not in the list. */
407 dirname
= (char *) _nl_default_dirname
;
408 else if (binding
->dirname
[0] == '/')
409 dirname
= binding
->dirname
;
412 /* We have a relative path. Make it absolute now. */
413 size_t dirname_len
= strlen (binding
->dirname
) + 1;
417 path_max
= (unsigned int) PATH_MAX
;
418 path_max
+= 2; /* The getcwd docs say to do this. */
420 dirname
= (char *) alloca (path_max
+ dirname_len
);
421 ADD_BLOCK (block_list
, dirname
);
424 while ((ret
= getcwd (dirname
, path_max
)) == NULL
&& errno
== ERANGE
)
426 path_max
+= PATH_INCR
;
427 dirname
= (char *) alloca (path_max
+ dirname_len
);
428 ADD_BLOCK (block_list
, dirname
);
434 /* We cannot get the current working directory. Don't signal an
435 error but simply return the default string. */
436 FREE_BLOCKS (block_list
);
437 __set_errno (saved_errno
);
440 /* Use the Germanic plural rule. */
441 : n
== 1 ? (char *) msgid1
: (char *) msgid2
);
444 stpcpy (stpcpy (strchr (dirname
, '\0'), "/"), binding
->dirname
);
447 /* Now determine the symbolic name of CATEGORY and its value. */
448 categoryname
= category_to_name (category
);
449 categoryvalue
= guess_category_value (category
, categoryname
);
451 domainname_len
= strlen (domainname
);
452 xdomainname
= (char *) alloca (strlen (categoryname
)
453 + domainname_len
+ 5);
454 ADD_BLOCK (block_list
, xdomainname
);
456 stpcpy (mempcpy (stpcpy (stpcpy (xdomainname
, categoryname
), "/"),
457 domainname
, domainname_len
),
460 /* Creating working area. */
461 single_locale
= (char *) alloca (strlen (categoryvalue
) + 1);
462 ADD_BLOCK (block_list
, single_locale
);
465 /* Search for the given string. This is a loop because we perhaps
466 got an ordered list of languages to consider for the translation. */
469 /* Make CATEGORYVALUE point to the next element of the list. */
470 while (categoryvalue
[0] != '\0' && categoryvalue
[0] == ':')
472 if (categoryvalue
[0] == '\0')
474 /* The whole contents of CATEGORYVALUE has been searched but
475 no valid entry has been found. We solve this situation
476 by implicitly appending a "C" entry, i.e. no translation
478 single_locale
[0] = 'C';
479 single_locale
[1] = '\0';
483 char *cp
= single_locale
;
484 while (categoryvalue
[0] != '\0' && categoryvalue
[0] != ':')
485 *cp
++ = *categoryvalue
++;
488 /* When this is a SUID binary we must not allow accessing files
489 outside the dedicated directories. */
491 && (memchr (single_locale
, '/',
492 _nl_find_language (single_locale
) - single_locale
)
494 /* Ingore this entry. */
498 /* If the current locale value is C (or POSIX) we don't load a
499 domain. Return the MSGID. */
500 if (strcmp (single_locale
, "C") == 0
501 || strcmp (single_locale
, "POSIX") == 0)
503 FREE_BLOCKS (block_list
);
504 __set_errno (saved_errno
);
507 /* Use the Germanic plural rule. */
508 : n
== 1 ? (char *) msgid1
: (char *) msgid2
);
512 /* Find structure describing the message catalog matching the
513 DOMAINNAME and CATEGORY. */
514 domain
= _nl_find_domain (dirname
, single_locale
, xdomainname
);
518 #if defined HAVE_TSEARCH || defined _LIBC
519 struct loaded_domain
*domaindata
=
520 (struct loaded_domain
*) domain
->data
;
521 unsigned long int index
= 0;
525 /* Try to find the translation among those which we
526 found at some time. */
527 search
= (struct known_translation_t
*) alloca (sizeof (*search
)
529 memcpy (search
->msgid
, msgid1
, msgid_len
);
530 search
->domain
= (char *) domainname
;
531 search
->plindex
= plural_eval (domaindata
->plural
, n
);
532 if (search
->plindex
>= domaindata
->nplurals
)
533 /* This should never happen. It means the plural expression
534 and the given maximum value do not match. */
536 index
= search
->plindex
;
537 search
->category
= category
;
539 foundp
= (struct known_translation_t
**) tfind (search
, &root
,
541 if (foundp
!= NULL
&& (*foundp
)->counter
== _nl_msg_cat_cntr
)
542 return (char *) (*foundp
)->translation
;
546 retval
= _nl_find_msg (domain
, msgid1
, index
);
552 for (cnt
= 0; domain
->successor
[cnt
] != NULL
; ++cnt
)
554 retval
= _nl_find_msg (domain
->successor
[cnt
], msgid1
,
564 FREE_BLOCKS (block_list
);
565 __set_errno (saved_errno
);
566 #if defined HAVE_TSEARCH || defined _LIBC
569 /* Create a new entry and add it to the search tree. */
570 struct known_translation_t
*newp
;
572 newp
= (struct known_translation_t
*)
573 malloc (sizeof (*newp
) + msgid_len
574 + domainname_len
+ 1 - ZERO
);
577 newp
->domain
= mempcpy (newp
->msgid
, msgid1
, msgid_len
);
578 memcpy (newp
->domain
, domainname
, domainname_len
+ 1);
579 newp
->plindex
= index
;
580 newp
->category
= category
;
581 newp
->counter
= _nl_msg_cat_cntr
;
582 newp
->translation
= retval
;
584 /* Insert the entry in the search tree. */
585 foundp
= (struct known_translation_t
**)
586 tsearch (newp
, &root
, transcmp
);
588 /* The insert failed. */
594 /* We can update the existing entry. */
595 (*foundp
)->counter
= _nl_msg_cat_cntr
;
596 (*foundp
)->translation
= retval
;
609 _nl_find_msg (domain_file
, msgid
, index
)
610 struct loaded_l10nfile
*domain_file
;
612 unsigned long int index
;
616 struct loaded_domain
*domain
;
618 if (domain_file
->decided
== 0)
619 _nl_load_domain (domain_file
);
621 if (domain_file
->data
== NULL
)
624 domain
= (struct loaded_domain
*) domain_file
->data
;
626 /* Locate the MSGID and its translation. */
627 if (domain
->hash_size
> 2 && domain
->hash_tab
!= NULL
)
629 /* Use the hashing table. */
630 nls_uint32 len
= strlen (msgid
);
631 nls_uint32 hash_val
= hash_string (msgid
);
632 nls_uint32 idx
= hash_val
% domain
->hash_size
;
633 nls_uint32 incr
= 1 + (hash_val
% (domain
->hash_size
- 2));
634 nls_uint32 nstr
= W (domain
->must_swap
, domain
->hash_tab
[idx
]);
637 /* Hash table entry is empty. */
640 if (W (domain
->must_swap
, domain
->orig_tab
[nstr
- 1].length
) == len
642 domain
->data
+ W (domain
->must_swap
,
643 domain
->orig_tab
[nstr
- 1].offset
)) == 0)
645 /* We found an entry. If we have to convert the string to use
646 a different character set this is the time. */
648 (char *) domain
->data
+ W (domain
->must_swap
,
649 domain
->trans_tab
[nstr
- 1].offset
);
651 /* Now skip some strings. How much depends on the index passed
656 result
= __rawmemchr (result
, '\0');
658 result
= strchr (result
, '\0');
660 /* And skip over the NUL byte. */
666 domain
->conv
!= (__gconv_t
) -1
669 domain
->conv
!= (iconv_t
) -1
674 /* We are supposed to do a conversion. First allocate an
675 appropriate table with the same structure as the hash
676 table in the file where we can put the pointers to the
677 converted strings in. */
678 if (domain
->conv_tab
== NULL
679 && ((domain
->conv_tab
= (char **) calloc (domain
->hash_size
,
682 /* Mark that we didn't succeed allocating a table. */
683 domain
->conv_tab
= (char **) -1;
685 if (domain
->conv_tab
== (char **) -1)
686 /* Nothing we can do, no more memory. */
689 if (domain
->conv_tab
[idx
] == NULL
)
691 /* We haven't used this string so far, so it is not
692 translated yet. Do this now. */
694 /* For glibc we use a bit more efficient memory handling.
695 We allocate always larger blocks which get used over
696 time. This is faster than many small allocations. */
697 __libc_lock_define_initialized (static, lock
)
698 static unsigned char *freemem
;
699 static size_t freemem_size
;
700 /* Note that we include the NUL byte. */
701 size_t resultlen
= strlen (result
) + 1;
702 const unsigned char *inbuf
= result
;
703 unsigned char *outbuf
= freemem
;
707 __libc_lock_lock (lock
);
709 while ((res
= __gconv (domain
->conv
,
710 &inbuf
, inbuf
+ resultlen
,
711 &outbuf
, outbuf
+ freemem_size
,
712 &written
)) == __GCONV_OK
)
714 if (res
!= __GCONV_FULL_OUTPUT
)
717 /* We must resize the buffer. */
718 freemem_size
= MAX (2 * freemem_size
, 4064);
719 freemem
= (char *) malloc (freemem_size
);
727 /* We have now in our buffer a converted string. Put this
729 domain
->conv_tab
[idx
] = freemem
;
730 freemem_size
-= outbuf
- freemem
;
734 __libc_lock_unlock (lock
);
738 result
= domain
->conv_tab
[idx
];
746 if (idx
>= domain
->hash_size
- incr
)
747 idx
-= domain
->hash_size
- incr
;
751 nstr
= W (domain
->must_swap
, domain
->hash_tab
[idx
]);
753 /* Hash table entry is empty. */
756 if (W (domain
->must_swap
, domain
->orig_tab
[nstr
- 1].length
) == len
758 domain
->data
+ W (domain
->must_swap
,
759 domain
->orig_tab
[nstr
- 1].offset
))
761 return ((char *) domain
->data
762 + W (domain
->must_swap
,
763 domain
->trans_tab
[nstr
- 1].offset
));
768 /* Now we try the default method: binary search in the sorted
769 array of messages. */
771 top
= domain
->nstrings
;
776 act
= (bottom
+ top
) / 2;
777 cmp_val
= strcmp (msgid
, (domain
->data
778 + W (domain
->must_swap
,
779 domain
->orig_tab
[act
].offset
)));
782 else if (cmp_val
> 0)
788 /* If an translation is found return this. */
789 return bottom
>= top
? NULL
: ((char *) domain
->data
790 + W (domain
->must_swap
,
791 domain
->trans_tab
[act
].offset
));
795 /* Function to evaluate the plural expression and return an index value. */
796 static unsigned long int
798 plural_eval (struct expression
*pexp
, unsigned long int n
)
800 switch (pexp
->operation
)
805 return pexp
->val
.num
;
807 return (plural_eval (pexp
->val
.args2
.left
, n
)
808 * plural_eval (pexp
->val
.args2
.right
, n
));
810 return (plural_eval (pexp
->val
.args2
.left
, n
)
811 / plural_eval (pexp
->val
.args2
.right
, n
));
813 return (plural_eval (pexp
->val
.args2
.left
, n
)
814 % plural_eval (pexp
->val
.args2
.right
, n
));
816 return (plural_eval (pexp
->val
.args2
.left
, n
)
817 + plural_eval (pexp
->val
.args2
.right
, n
));
819 return (plural_eval (pexp
->val
.args2
.left
, n
)
820 - plural_eval (pexp
->val
.args2
.right
, n
));
822 return (plural_eval (pexp
->val
.args2
.left
, n
)
823 == plural_eval (pexp
->val
.args2
.right
, n
));
825 return (plural_eval (pexp
->val
.args2
.left
, n
)
826 != plural_eval (pexp
->val
.args2
.right
, n
));
828 return (plural_eval (pexp
->val
.args2
.left
, n
)
829 && plural_eval (pexp
->val
.args2
.right
, n
));
831 return (plural_eval (pexp
->val
.args2
.left
, n
)
832 || plural_eval (pexp
->val
.args2
.right
, n
));
834 return (plural_eval (pexp
->val
.args3
.bexp
, n
)
835 ? plural_eval (pexp
->val
.args3
.tbranch
, n
)
836 : plural_eval (pexp
->val
.args3
.fbranch
, n
));
843 /* Return string representation of locale CATEGORY. */
846 category_to_name (category
)
855 retval
= "LC_COLLATE";
865 retval
= "LC_MONETARY";
870 retval
= "LC_NUMERIC";
880 retval
= "LC_MESSAGES";
885 retval
= "LC_RESPONSE";
890 /* This might not make sense but is perhaps better than any other
896 /* If you have a better idea for a default value let me know. */
903 /* Guess value of current locale from value of the environment variables. */
906 guess_category_value (category
, categoryname
)
908 const char *categoryname
;
912 /* The highest priority value is the `LANGUAGE' environment
913 variable. This is a GNU extension. */
914 retval
= getenv ("LANGUAGE");
915 if (retval
!= NULL
&& retval
[0] != '\0')
918 /* `LANGUAGE' is not set. So we have to proceed with the POSIX
919 methods of looking to `LC_ALL', `LC_xxx', and `LANG'. On some
920 systems this can be done by the `setlocale' function itself. */
921 #if defined HAVE_SETLOCALE && defined HAVE_LC_MESSAGES && defined HAVE_LOCALE_NULL
922 return setlocale (category
, NULL
);
924 /* Setting of LC_ALL overwrites all other. */
925 retval
= getenv ("LC_ALL");
926 if (retval
!= NULL
&& retval
[0] != '\0')
929 /* Next comes the name of the desired category. */
930 retval
= getenv (categoryname
);
931 if (retval
!= NULL
&& retval
[0] != '\0')
934 /* Last possibility is the LANG environment variable. */
935 retval
= getenv ("LANG");
936 if (retval
!= NULL
&& retval
[0] != '\0')
939 /* We use C as the default domain. POSIX says this is implementation
945 /* @@ begin of epilog @@ */
947 /* We don't want libintl.a to depend on any other library. So we
948 avoid the non-standard function stpcpy. In GNU C Library this
949 function is available, though. Also allow the symbol HAVE_STPCPY
951 #if !_LIBC && !HAVE_STPCPY
957 while ((*dest
++ = *src
++) != '\0')
963 #if !_LIBC && !HAVE_MEMPCPY
965 mempcpy (dest
, src
, n
)
970 return (void *) ((char *) memcpy (dst
, src
, n
) + n
);
976 /* If we want to free all resources we have to do some work at
978 static void __attribute__ ((unused
))
981 struct binding
*runp
;
983 for (runp
= _nl_domain_bindings
; runp
!= NULL
; runp
= runp
->next
)
985 free (runp
->domainname
);
986 if (runp
->dirname
!= _nl_default_dirname
)
987 /* Yes, this is a pointer comparison. */
988 free (runp
->dirname
);
991 if (_nl_current_default_domain
!= _nl_default_default_domain
)
992 /* Yes, again a pointer comparison. */
993 free ((char *) _nl_current_default_domain
);
995 /* Remove the search tree with the know translations. */
996 __tdestroy (root
, free
);
999 text_set_element (__libc_subfreeres
, free_mem
);