[PING] [PATCH v3] localedata: Translit common emojis to smileys [BZ #30649]

Colin Leroy-Mira colin@colino.net
Wed Aug 16 08:33:58 GMT 2023


On 08 August 2023 at 15h52, Colin Leroy-Mira wrote:

Hi, 
ping :)
Thanks in advance,
Colin

> Add common emojis to the translit-able characters (mostly
> faces and hearts), and translit them to old-fashioned
> smileys.
> 
> Author: Colin Leroy-Mira <colin@colino.net>
> Signed-off-by: Colin Leroy-Mira <colin@colino.net>
> ---
> v3: Directly use UTF-8 in translit data and test, for legibility
> v2: Fix a wrong smiley, add unit test
>  localedata/Makefile                 |   3 +
>  localedata/locales/translit_emojis  |  90 ++++++++++++++++++++++
>  localedata/locales/translit_neutral |   1 +
>  localedata/tst-iconv-emojis-trans.c | 115 ++++++++++++++++++++++++++++
>  4 files changed, 209 insertions(+)
>  create mode 100644 localedata/locales/translit_emojis
>  create mode 100644 localedata/tst-iconv-emojis-trans.c
> 
> diff --git a/localedata/Makefile b/localedata/Makefile
> index 3619b6d47e..5b6d10e33f 100644
> --- a/localedata/Makefile
> +++ b/localedata/Makefile
> @@ -164,6 +164,7 @@ tests = \
>    bug-usesetlocale \
>    tst-c-utf8-consistency \
>    tst-digits \
> +  tst-iconv-emojis-trans \
>    tst-iconv-math-trans \
>    tst-leaks \
>    tst-mbswcs1 \
> @@ -320,6 +321,8 @@ LOCALES := \
>  
>  include ../gen-locales.mk
>  
> +$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
> +
>  $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
>  endif
>  
> diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
> new file mode 100644
> index 0000000000..6bcaf7a02e
> --- /dev/null
> +++ b/localedata/locales/translit_emojis
> @@ -0,0 +1,90 @@
> +escape_char /
> +comment_char %
> +
> +% This file is part of the GNU C Library and contains locale data.
> +% The Free Software Foundation does not claim any copyright interest
> +% in the locale data contained in this file.  The foregoing does not
> +% affect the license of the GNU C Library as a whole.  It does not
> +% exempt you from the conditions of the license if your use would
> +% otherwise be governed by that license.
> +
> +% Transliterations of emojis to ASCII smileys.
> +
> +LC_CTYPE
> +
> +translit_start
> +
> +♡ "/<3" % WHITE HEART SUIT
> +♥ "/<3" % BLACK HEART SUIT
> +❤ "/<3" % HEAVY BLACK HEART
> +💙 "/<3" % BLUE HEART
> +💓 "/<3" % BEATING HEART
> +💔 "/<//3" % BROKEN HEART
> +💖 "/<3" % SPARKLING HEART
> +💗 "/<3" % GROWING HEART
> +💚 "/<3" % GREEN HEART
> +💛 "/<3" % YELLOW HEART
> +💜 "/<3" % PURPLE HEART
> +🖤 "/<3" % BLACK HEART
> +🧡 "/<3" % ORANGE HEART
> +🤍 "/<3" % WHITE HEART
> +🤎 "/<3" % BROWN HEART
> +😀 ":-D" % GRINNING FACE
> +😁 ":-D" % GRINNING FACE WITH SMILING EYES
> +😂 ":'D" % FACE WITH TEARS OF JOY
> +😃 ":-D" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
> +😄 ":-D" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
> +😅 ":-D" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
> +😆 ":-D" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
> +😇 "O:-)" % SMILING FACE WITH HALO
> +😈 "/>:)" % SMILING FACE WITH HORNS
> +😉 ";-)" % WINKING FACE
> +😊 ":-)" % SMILING FACE WITH SMILING EYES
> +😋 ":-P" % FACE SAVOURING DELICIOUS FOOD
> +😌 ":-)" % RELIEVED FACE
> +😍 ":-*" % SMILING FACE WITH HEART-SHAPED EYES
> +😎 "B-)" % SMILING FACE WITH SUNGLASSES
> +😏 ";-)" % SMIRKING FACE
> +😐 ":-|" % NEUTRAL FACE
> +😑 ":-|" % EXPRESSIONLESS FACE
> +😒 ":-|" % UNAMUSED FACE
> +😓 ":'-|" % FACE WITH COLD SWEAT
> +😔 ":-|" % PENSIVE FACE
> +😕 ":-//" % CONFUSED FACE
> +😖 ":-S" % CONFOUNDED FACE
> +😗 ":-*" % KISSING FACE
> +😘 ":-*" % FACE THROWING A KISS
> +😙 ":-*" % KISSING FACE WITH SMILING EYES
> +😚 ":-*" % KISSING FACE WITH CLOSED EYES
> +😛 ":-P" % FACE WITH STUCK-OUT TONGUE
> +😜 ";-P" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
> +😝 "X-P" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
> +😞 ":-(" % DISAPPOINTED FACE
> +😟 ":-(" % WORRIED FACE
> +😠 "/>:-(" % ANGRY FACE
> +😡 ":-(" % POUTING FACE
> +😢 ":'-(" % CRYING FACE
> +😣 "X-(" % PERSEVERING FACE
> +😦 ":-O" % FROWNING FACE WITH OPEN MOUTH
> +😧 ":-O" % ANGUISHED FACE
> +😨 ":-O" % FEARFUL FACE
> +😩 ":-O" % WEARY FACE
> +😭 ":<U0022>-(" % LOUDLY CRYING FACE
> +😮 ":-O" % FACE WITH OPEN MOUTH
> +😯 ":-O" % HUSHED FACE
> +😰 ":'-O" % FACE WITH OPEN MOUTH AND COLD SWEAT
> +😱 ":-O" % FACE SCREAMING IN FEAR
> +😲 ":-O" % ASTONISHED FACE
> +😸 ":-3" % GRINNING CAT FACE WITH SMILING EYES
> +😹 ":'-3" % CAT FACE WITH TEARS OF JOY
> +😺 ":-3" % SMILING CAT FACE WITH OPEN MOUTH
> +😻 ":-3" % SMILING CAT FACE WITH HEART-SHAPE EYES
> +😼 ";-3" % CAT FACE WITH WRY SMILE
> +😽 ":-3" % KISSING CAT FACE WITH CLOSED EYES
> +🙁 ":-(" % SLIGHTLY FROWNING FACE
> +🙂 ":-)" % SLIGHTLY SMILING FACE
> +🙃 "(-:" % UPSIDE-DOWN FACE
> +
> +translit_end
> +
> +END LC_CTYPE
> diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
> index 72f66220b7..57412ae565 100644
> --- a/localedata/locales/translit_neutral
> +++ b/localedata/locales/translit_neutral
> @@ -17,6 +17,7 @@ translit_start
>  include "translit_circle";""
>  include "translit_cjk_compat";""
>  include "translit_compat";""
> +include "translit_emojis";""
>  include "translit_font";""
>  include "translit_fraction";""
>  include "translit_narrow";""
> diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c
> new file mode 100644
> index 0000000000..dda9d3fd5c
> --- /dev/null
> +++ b/localedata/tst-iconv-emojis-trans.c
> @@ -0,0 +1,115 @@
> +/* Test some emoji transliterations
> +
> +   Copyright (C) 2019-2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <iconv.h>
> +#include <locale.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <support/check.h>
> +
> +static int
> +do_test (void)
> +{
> +  iconv_t cd;
> +
> +  const int num_emojis = 70;
> +
> +  const char str[] = "♡ ♥ ❤ 💙 💓 "
> +                     "💔 💖 💗 💚 💛 "
> +                     "💜 🖤 🧡 🤍 🤎 "
> +                     "😀 😁 😂 😃 😄 "
> +                     "😅 😆 😇 😈 😉 "
> +                     "😊 😋 😌 😍 😎 "
> +                     "😏 😐 😑 😒 😓 "
> +                     "😔 😕 😖 😗 😘 "
> +                     "😙 😚 😛 😜 😝 "
> +                     "😞 😟 😠 😡 😢 "
> +                     "😣 😦 😧 😨 😩 "
> +                     "😭 😮 😯 😰 😱 "
> +                     "😲 😸 😹 😺 😻 "
> +                     "😼 😽 🙁 🙂 🙃";
> +
> +  const char expected[] = "<3 <3 <3 <3 <3 "
> +                          "</3 <3 <3 <3 <3 "
> +                          "<3 <3 <3 <3 <3 "
> +                          ":-D :-D :'D :-D :-D "
> +                          ":-D :-D O:-) >:) ;-) "
> +                          ":-) :-P :-) :-* B-) "
> +                          ";-) :-| :-| :-| :'-| "
> +                          ":-| :-/ :-S :-* :-* "
> +                          ":-* :-* :-P ;-P X-P "
> +                          ":-( :-( >:-( :-( :'-( "
> +                          "X-( :-O :-O :-O :-O "
> +                          ":\"-( :-O :-O :'-O :-O "
> +                          ":-O :-3 :'-3 :-3 :-3 "
> +                          ";-3 :-3 :-( :-) (-:";
> +
> +  char *inptr = (char *) str;
> +  size_t inlen = strlen (str) + 1;
> +  char outbuf[500];
> +  char *outptr = outbuf;
> +  size_t outlen = sizeof (outbuf);
> +  int result = 0;
> +  size_t n;
> +
> +  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
> +    FAIL_EXIT1 ("setlocale failed");
> +
> +  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
> +  if (cd == (iconv_t) -1)
> +    FAIL_EXIT1 ("iconv_open failed");
> +
> +  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
> +  if (n != num_emojis)
> +    {
> +      if (n == (size_t) -1)
> +        printf ("iconv() returned error: %m\n");
> +      else
> +        printf ("iconv() returned %zd, expected %d\n", n, num_emojis);
> +      result = 1;
> +    }
> +  if (inlen != 0)
> +    {
> +      puts ("not all input consumed");
> +      result = 1;
> +    }
> +  else if (inptr - str != strlen (str) + 1)
> +    {
> +      printf ("inptr wrong, advanced by %td\n", inptr - str);
> +      result = 1;
> +    }
> +  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
> +    {
> +      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
> +              (int) (sizeof (outbuf) - outlen), outbuf, expected);
> +      result = 1;
> +    }
> +  else if (outlen != sizeof (outbuf) - sizeof (expected))
> +    {
> +      printf ("outlen wrong: %zd, expected %zd\n", outlen,
> +              sizeof (outbuf) - sizeof (expected));
> +      result = 1;
> +    }
> +  else
> +    printf ("output is \"%s\" which is OK\n", outbuf);
> +
> +  return result;
> +}
> +
> +#include <support/test-driver.c>



-- 
Colin


More information about the Libc-alpha mailing list