[PATCH] intl: Handle translation output codesets with TRANSLIT or IGNORE suffixes
Arjun Shankar
arjun.is@lostca.se
Fri Sep 25 12:09:05 GMT 2020
From: Arjun Shankar <arjun@redhat.com>
Commit 91927b7c7643 (Rewrite iconv option parsing [BZ #19519]) did not
handle cases where the output codeset for translations (via the `gettext'
family of functions) might have a caller specified encoding suffix such as
TRANSLIT or IGNORE. This led to a regression where translations did not
work when the codeset had a suffix.
This commit fixes the above issue by parsing any suffixes passed to
__dcigettext and adds two new test-cases to intl/tst-codeset.c to
verify correct behaviour. The iconv-internal function __gconv_create_spec
and the static iconv-internal function gconv_destroy_spec are now visible
internally within glibc and used in intl/dcigettext.c.
---
iconv/Versions | 4 +++-
iconv/gconv_charset.c | 10 ++++++++++
iconv/gconv_charset.h | 27 ---------------------------
iconv/gconv_int.h | 21 +++++++++++++++++++++
iconv/iconv_open.c | 2 +-
iconv/iconv_prog.c | 2 +-
intl/dcigettext.c | 17 ++++++++++-------
intl/tst-codeset.c | 34 ++++++++++++++--------------------
support/.check.h.swp | Bin 0 -> 16384 bytes
9 files changed, 60 insertions(+), 57 deletions(-)
create mode 100644 support/.check.h.swp
diff --git a/iconv/Versions b/iconv/Versions
index 8a5f4cf780..d51af52fa3 100644
--- a/iconv/Versions
+++ b/iconv/Versions
@@ -6,7 +6,9 @@ libc {
GLIBC_PRIVATE {
# functions shared with iconv program
__gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db;
- __gconv_open; __gconv_create_spec;
+
+ # functions used elsewhere in glibc
+ __gconv_open; __gconv_create_spec; __gconv_destroy_spec;
# function used by the gconv modules
__gconv_transliterate;
diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c
index 6ccd0773cc..4ba0aa99f5 100644
--- a/iconv/gconv_charset.c
+++ b/iconv/gconv_charset.c
@@ -216,3 +216,13 @@ out:
return ret;
}
libc_hidden_def (__gconv_create_spec)
+
+
+void
+__gconv_destroy_spec (struct gconv_spec *conv_spec)
+{
+ free (conv_spec->fromcode);
+ free (conv_spec->tocode);
+ return;
+}
+libc_hidden_def (__gconv_destroy_spec)
diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h
index b39b09aea1..e9c122cf7e 100644
--- a/iconv/gconv_charset.h
+++ b/iconv/gconv_charset.h
@@ -48,33 +48,6 @@
#define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE"
-/* This function accepts the charset names of the source and destination of the
- conversion and populates *conv_spec with an equivalent conversion
- specification that may later be used by __gconv_open. The charset names
- might contain options in the form of suffixes that alter the conversion,
- e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
- and truncating any suffix options in fromcode, and processing and truncating
- any suffix options in tocode. Supported suffix options ("TRANSLIT" or
- "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
- to be set to true. Unrecognized suffix options are silently discarded. If
- the function succeeds, it returns conv_spec back to the caller. It returns
- NULL upon failure. */
-struct gconv_spec *
-__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
- const char *tocode);
-libc_hidden_proto (__gconv_create_spec)
-
-
-/* This function frees all heap memory allocated by __gconv_create_spec. */
-static void __attribute__ ((unused))
-gconv_destroy_spec (struct gconv_spec *conv_spec)
-{
- free (conv_spec->fromcode);
- free (conv_spec->tocode);
- return;
-}
-
-
/* This function copies in-order, characters from the source 's' that are
either alpha-numeric or one in one of these: "_-.,:/" - into the destination
'wp' while dropping all other characters. In the process, it converts all
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index e86938dae7..f721ce30ff 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -152,6 +152,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec,
__gconv_t *handle, int flags);
libc_hidden_proto (__gconv_open)
+/* This function accepts the charset names of the source and destination of the
+ conversion and populates *conv_spec with an equivalent conversion
+ specification that may later be used by __gconv_open. The charset names
+ might contain options in the form of suffixes that alter the conversion,
+ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
+ and truncating any suffix options in fromcode, and processing and truncating
+ any suffix options in tocode. Supported suffix options ("TRANSLIT" or
+ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
+ to be set to true. Unrecognized suffix options are silently discarded. If
+ the function succeeds, it returns conv_spec back to the caller. It returns
+ NULL upon failure. */
+extern struct gconv_spec *
+__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
+ const char *tocode);
+libc_hidden_proto (__gconv_create_spec)
+
+/* This function frees all heap memory allocated by __gconv_create_spec. */
+extern void
+__gconv_destroy_spec (struct gconv_spec *conv_spec);
+libc_hidden_proto (__gconv_destroy_spec)
+
/* Free resources associated with transformation descriptor CD. */
extern int __gconv_close (__gconv_t cd)
attribute_hidden;
diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c
index dd54bc12e0..5b30055c04 100644
--- a/iconv/iconv_open.c
+++ b/iconv/iconv_open.c
@@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode)
int res = __gconv_open (&conv_spec, &cd, 0);
- gconv_destroy_spec (&conv_spec);
+ __gconv_destroy_spec (&conv_spec);
if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK)
{
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index b4334faa57..d59979759c 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -184,7 +184,7 @@ main (int argc, char *argv[])
/* Let's see whether we have these coded character sets. */
res = __gconv_open (&conv_spec, &cd, 0);
- gconv_destroy_spec (&conv_spec);
+ __gconv_destroy_spec (&conv_spec);
if (res != __GCONV_OK)
{
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
index 2e7c662bc7..bd332e71da 100644
--- a/intl/dcigettext.c
+++ b/intl/dcigettext.c
@@ -1120,15 +1120,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,
# ifdef _LIBC
- struct gconv_spec conv_spec
- = { .fromcode = norm_add_slashes (charset, ""),
- .tocode = norm_add_slashes (outcharset, ""),
- /* We always want to use transliteration. */
- .translit = true,
- .ignore = false
- };
+ struct gconv_spec conv_spec;
+
+ __gconv_create_spec (&conv_spec, charset, outcharset);
+
+ /* We always want to use transliteration. */
+ conv_spec.translit = true;
+
int r = __gconv_open (&conv_spec, &convd->conv,
GCONV_AVOID_NOCONV);
+
+ __gconv_destroy_spec (&conv_spec);
+
if (__builtin_expect (r != __GCONV_OK, 0))
{
/* If the output encoding is the same there is
diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c
index fd70432eca..c13f791641 100644
--- a/intl/tst-codeset.c
+++ b/intl/tst-codeset.c
@@ -22,13 +22,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <support/check.h>
static int
do_test (void)
{
- char *s;
- int result = 0;
-
unsetenv ("LANGUAGE");
unsetenv ("OUTPUT_CHARSET");
setlocale (LC_ALL, "de_DE.ISO-8859-1");
@@ -36,25 +34,21 @@ do_test (void)
bindtextdomain ("codeset", OBJPFX "domaindir");
/* Here we expect output in ISO-8859-1. */
- s = gettext ("cheese");
- if (strcmp (s, "K\344se"))
- {
- printf ("call 1 returned: %s\n", s);
- result = 1;
- }
+ TEST_COMPARE_STRING (gettext ("cheese"), "K\344se");
+ /* Here we expect output in UTF-8. */
bind_textdomain_codeset ("codeset", "UTF-8");
+ TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se");
- /* Here we expect output in UTF-8. */
- s = gettext ("cheese");
- if (strcmp (s, "K\303\244se"))
- {
- printf ("call 2 returned: %s\n", s);
- result = 1;
- }
-
- return result;
+ /* The accented `a' is transliterated. */
+ bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT");
+ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
+
+ /* Transliteration also works by default even if not set. */
+ bind_textdomain_codeset ("codeset", "ASCII");
+ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
+
+ return 0;
}
-#define TEST_FUNCTION do_test ()
-#include "../test-skeleton.c"
+#include <support/test-driver.c>
diff --git a/support/.check.h.swp b/support/.check.h.swp
new file mode 100644
index 0000000000000000000000000000000000000000..bec356c03cf91a9c00655cbf5b29d40164ea58d7
GIT binary patch
literal 16384
zcmeHNON=8|6)gw@BqV@fM<jG-8YcG0*xfxL1L<MF?QwToc-$@9-IJC^D$3<oc4^90
zR(-nLM8E=sf&@qi7O)_T2#5kw!cVh6z#4W)ksuq8kRTEf8v+|7BsllIDtFoKPbXQR
zNM-5BpI7gG&b#l`y|>(1J*eHHj}_N7yxyy67oWMUzkKmq+Pxn?tQkSyaPJyE4=s9~
zteJ-2@!Un%@Yv@Qfu+DVOFhT#n57^Z4n03CnSEj2E%uj*$i_1UG6r5Z0}p9e*Vk5g
z`pTt?^#1qlzivs{#EgNAfsBESfsBESfsBESfsBESfj1um;qX4~3o!ewvFS-g@Rs;{
z^8VlWwHg0@Fg1Q({5>DPe|u^?{+hjI3}g&s3}g&s3}g&s3}g&s3}g&s3}g&s3}g)a
z9~dyYn)WW}eUb}(JpWJX{}<n*Y2N`{U;sP<{O#SE_8edW9pD4NOAl$<H-XOpI`A;?
z@`IZ8Yv5PF_kphgUj-r{1S~)R9|Z0P{s9Ai1pE-NfJ?vy;4cqo+S9;yfdk-Ez-8dC
z_hSrr8h8rm1DAmh1Als_ru`21I#3550DkihP5Ux%6Zj->1NhI|HSHI`3Gf*3uM3*?
zQ(y=bfjp1{9sw=`ue?puegceu9pFLWMKnFW0UQ7y0$xHB<r&~hz!!lkz>Sq(f?MYI
zvq>{WtuqlF*@4}01ce@jeL+LR7j8&H-!nxJkl}VI6hTN`-#!w4k*M9b1F|C53~l_~
z0s;nN;Q1pO7{j6M_6iiRAR{Epu${;k0r`UeULm?<yJYrlr<+K}7__mBQ1}DeHA0xd
zHYnv%nMmZ2=|wKCBd8BT&sXvc!XHHNSu0(krZ7FfONKHyRzGbqtO+CERi1S{nBaP$
z1{-wU2t(iQM4`}i%H>?B;4Aa_4YqLU%k*5(WWpM{B2I>WUOV#aF2#Q?(V0XY4BFO6
z7bkY8+k>G)IU8IDp%F$w0S=x@uxv*ZKv+nQ?TUi7M16XyRK77^yHcE<aaP$pG74bJ
zbt4CsujdP7dHw(^og4bF)GE-U1^Q_IJcLbXmqH3uEEe<2{9MS<1mBY5I0jN&%b#Py
zq3o=mv3F}Tyt@hq-<WrDwbj3kbjg`MY2gGdQxSj-OZ2%@hH`5Kn7>u4S9P5~)oTs;
zX+^F7+<>_gbG$e(=T66fk|3NwEV^pZSYpG*qKg)eDG9bkIqB}gYiAF#gxr&}$Ikcb
z9c`s6ENo9|S6glUpxUf$9qQF5YVGqHb!YCeB@bQ>R)UY!C-#~ojtnQl2^sK7Z5d!6
zN86RsL{!XLQ?=cfMM}1&YRzY^>3Nq^ijYjE)@R<0iE_Br@Oc*6ZX`&Y2s6URUVi3^
z^D-x^JL71(c5U`ypz8-^z1-Yx>3V(~ty|?<UEgoC_V@O7o9*h&9G|>uSF4keJd3Vf
z%d7Z;{d;`8kMF|+<ON1g#Hm4Y-#)g(J{bW;Zfqv*&oXfUdA7X4E--)2Q8wRt-ehYj
zyX8)ajb9tvJu@LFAiZFBIMIktU!*YeTw20=a_gS8y%t-aOQxQZkBkFbz1B{zjq(M7
zb?VD$g2a5Y3!DmB)M}U8`z=n7T!F<vrX+>6_A8ZYtHmdcTF2#%r*bfo>#9T-nGBT_
zjbm}vUF@}y(X6&p3(5Irtl|x<BTh)r>vj#KgI*L4BLtwXZ&tT!4gF@dQg7kUhqh}v
zQCHCQU=);09OOZ<e*?eAM|!+F^vX`P@@ahs*IO=zX!>_hLf89YI1C;ym5z^(i#<0g
zdVa6u*rsp;5tMF>*RZaBBE#|Ao?@1%-frwuh3a<4H~i5CFC|h^<k3Q=#)n>n9M_AG
zHusGq0d`D*RGjl%(;JR>uO>q32xBb!ws3`SIJ6gaU<5B3%^ZPBX(Fd_gdAQ_SA+<s
zpvY5hRhyNaa-&_|tkrAnL&7y%wRQucL|eN}D$`!M*{)Ug>*Xfx?Kk&!TU9vP5-Np<
zPb`FwxZAb%&hCDj%8f(1U2ZlZ^Ke6YkEOwFO}NqlzE9b3>)7yp!wuOnme8Iihy%Q9
z2XU${iwgF??+sZqvQgYKk8NbA9YIkbtjH-y3XppTs90#dxR!7h`(I$+a+o>tBA>h=
z7dCkuZzFPWzx64eDOpJ<!_FHry+I!)cSh2KEg#OeJS&6?g0@hubPblZL25s;I|Hwa
z3V?0IC}gwgT>J!yO&km}S8FaA7OM?nwHxdNnTCrvIc16UVLOyO{Er?+s1gq626Ksm
zFH9>RN22t%f)MlVULQ$*B~R<CYma?oeRX}6=J{HnnrjyMu-S@BM8mPMm>goN4#X`^
z-55|BMox%e<NE)%sE5CTI+^SLysAyLd#Lvv;1ci$)cL;$z6S_k9e5RW{yo3~O29u+
z$G-}^0Q7-8@Cs`9XMry8C~y_{JL>o6fe3gU_&9I@_#<lf7l7x19`Fg^I`D7U^=IJc
zz|VkZfgb>D?^8hb${5HP$QZ~N$QZ~NcoQ?QB3xu=gydx^%a?r?U9UE7CgnrAlZT8P
zedp6wULZdrFU)aAQniocN)u_G&)I#dL{;7B&_@pdW3q98JbcKeAX7rZdfXSt5@o80
zv{Kdp9l=R6I?CE)n|I0*WLN@;TWkf~s&NC|MQM=q?>R$8osNz@_xL#(O-f=VicPS5
zV<3(_|E@9`3FLIw^jH{o3!J#SdgQW1UFCM%37#a19|R;srJLV*&KS>HXBrL{w{d<g
zZ5%&`P^~XyvkPD8j;hRK(R@d5{yZ)H^i)S!p`5w26Ax?B6Q^726Fq8nNH@I!KXcK!
zq5<2v^lVl;`CLHyP>@;tLcdqk)D?Msp1a1mr2jHgtGZlCpV?NPYK&EOZ=wFH>aBLO
z*4WOarswC{B|BrjFapmtI?jlzT9h6gVH$`Tj`V>Y;BkW4FHpzhZW6*>_7_BVTtXWp
zpFRvd#1;00d_0Ay#pU?Mtq-}k+W1NFiDyNscJ8R>Onotua<R`X!B4s%x$NMPl`2lg
z;g4g$Lt{5Ct_{Bz4RCP)6&$G*wPtd|Bzo%At+uqMS=-)GpJJJl52R4vFJHcN5U8+q
zgv$YVj>tVb7xR|KZF(9nCu^IGJ^5>F$7_=}y$j}sNIZZhqTO?`^JSxikB~75y|8~?
z-Eyy8Q8A4Tu=sUWW#X>XIDg-E9Iun?TBT8LhGlD<Y|yk8Qroo<zw6){%H{4|Wc;jt
q+wAM*Gmn|)fbv3GP7@}@N5TaE5I*NP*sSkv#)rW&Dako;NBbW?lfZuf
literal 0
HcmV?d00001
--
2.26.2
More information about the Libc-alpha
mailing list