]> sourceware.org Git - glibc.git/blame - iconv/gconv_trans.c
syslog: Fix integer overflow in __vsyslog_internal (CVE-2023-6780)
[glibc.git] / iconv / gconv_trans.c
CommitLineData
55985355 1/* Transliteration using the locale's data.
dff8da6b 2 Copyright (C) 2000-2024 Free Software Foundation, Inc.
55985355 3 This file is part of the GNU C Library.
55985355
UD
4
5 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
55985355
UD
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 13 Lesser General Public License for more details.
55985355 14
41bdb6e2 15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
55985355 18
d6204268 19#include <assert.h>
f1d5c60d 20#include <dlfcn.h>
d6204268 21#include <search.h>
55985355 22#include <stdint.h>
d6204268 23#include <string.h>
7884bf47 24#include <stdlib.h>
55985355 25
ec999b8e 26#include <libc-lock.h>
55985355
UD
27#include "gconv_int.h"
28#include "../locale/localeinfo.h"
88f4b692 29#include <pointer_guard.h>
55985355
UD
30
31
32int
f1d5c60d
UD
33__gconv_transliterate (struct __gconv_step *step,
34 struct __gconv_step_data *step_data,
35 const unsigned char *inbufstart,
36 const unsigned char **inbufp,
37 const unsigned char *inbufend,
38 unsigned char **outbufstart, size_t *irreversible)
55985355
UD
39{
40 /* Find out about the locale's transliteration. */
535e935a 41 uint32_t size;
17427edd
UD
42 const uint32_t *from_idx;
43 const uint32_t *from_tbl;
44 const uint32_t *to_idx;
45 const uint32_t *to_tbl;
46 const uint32_t *winbuf;
47 const uint32_t *winbufend;
535e935a
NG
48 uint32_t low;
49 uint32_t high;
55985355 50
d5055a20 51 /* The input buffer. There are actually 4-byte values. */
17427edd
UD
52 winbuf = (const uint32_t *) *inbufp;
53 winbufend = (const uint32_t *) inbufend;
d5055a20 54
1911b455 55 __gconv_fct fct = step->__fct;
1911b455
UD
56 if (step->__shlib_handle != NULL)
57 PTR_DEMANGLE (fct);
1911b455 58
55985355
UD
59 /* If there is no transliteration information in the locale don't do
60 anything and return the error. */
04fbc779 61 size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
55985355 62 if (size == 0)
1d96d74d 63 goto no_rules;
55985355 64
f1d5c60d 65 /* Get the rest of the values. */
17427edd
UD
66 from_idx =
67 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
68 from_tbl =
69 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
70 to_idx =
71 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
72 to_tbl =
73 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
f1d5c60d 74
f1d5c60d
UD
75 /* Test whether there is enough input. */
76 if (winbuf + 1 > winbufend)
77 return (winbuf == winbufend
78 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
79
14ef9c18
DL
80 /* The array starting at FROM_IDX contains indices to the string table
81 in FROM_TBL. The indices are sorted wrt to the strings. I.e., we
f1d5c60d
UD
82 are doing binary search. */
83 low = 0;
84 high = size;
85 while (low < high)
86 {
535e935a 87 uint32_t med = (low + high) / 2;
f1d5c60d
UD
88 uint32_t idx;
89 int cnt;
90
91 /* Compare the string at this index with the string at the current
92 position in the input buffer. */
93 idx = from_idx[med];
94 cnt = 0;
95 do
96 {
97 if (from_tbl[idx + cnt] != winbuf[cnt])
98 /* Does not match. */
99 break;
100 ++cnt;
101 }
102 while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
103
104 if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
105 {
106 /* Found a matching input sequence. Now try to convert the
107 possible replacements. */
108 uint32_t idx2 = to_idx[med];
109
110 do
111 {
112 /* Determine length of replacement. */
535e935a 113 unsigned int len = 0;
f1d5c60d
UD
114 int res;
115 const unsigned char *toinptr;
403cb8a1 116 unsigned char *outptr;
f1d5c60d
UD
117
118 while (to_tbl[idx2 + len] != L'\0')
119 ++len;
120
121 /* Try this input text. */
122 toinptr = (const unsigned char *) &to_tbl[idx2];
403cb8a1 123 outptr = *outbufstart;
1911b455 124 res = DL_CALL_FCT (fct,
f1d5c60d
UD
125 (step, step_data, &toinptr,
126 (const unsigned char *) &to_tbl[idx2 + len],
403cb8a1 127 &outptr, NULL, 0, 0));
f1d5c60d
UD
128 if (res != __GCONV_ILLEGAL_INPUT)
129 {
130 /* If the conversion succeeds we have to increment the
131 input buffer. */
132 if (res == __GCONV_EMPTY_INPUT)
133 {
134 *inbufp += cnt * sizeof (uint32_t);
135 ++*irreversible;
a8e4c924 136 res = __GCONV_OK;
f1d5c60d 137 }
1b14353e
UD
138 /* Do not increment the output pointer if we could not
139 store the entire output. */
140 if (res != __GCONV_FULL_OUTPUT)
141 *outbufstart = outptr;
f1d5c60d
UD
142
143 return res;
144 }
145
146 /* Next replacement. */
147 idx2 += len + 1;
148 }
149 while (to_tbl[idx2] != L'\0');
150
151 /* Nothing found, continue searching. */
152 }
a8e4c924
UD
153 else if (cnt > 0)
154 /* This means that the input buffer contents matches a prefix of
155 an entry. Since we cannot match it unless we get more input,
156 we will tell the caller about it. */
157 return __GCONV_INCOMPLETE_INPUT;
f1d5c60d
UD
158
159 if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
04fbc779 160 low = med + 1;
f1d5c60d 161 else
04fbc779 162 high = med;
f1d5c60d
UD
163 }
164
1d96d74d 165 no_rules:
a8e4c924
UD
166 /* Maybe the character is supposed to be ignored. */
167 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0)
168 {
169 int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN);
17427edd
UD
170 const uint32_t *ranges =
171 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE);
172 const uint32_t wc = *(const uint32_t *) (*inbufp);
a8e4c924
UD
173 int i;
174
175 /* Test whether there is enough input. */
176 if (winbuf + 1 > winbufend)
177 return (winbuf == winbufend
178 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
179
180 for (i = 0; i < n; ranges += 3, ++i)
181 if (ranges[0] <= wc && wc <= ranges[1]
182 && (wc - ranges[0]) % ranges[2] == 0)
183 {
184 /* Matches the range. Ignore it. */
185 *inbufp += 4;
186 ++*irreversible;
187 return __GCONV_OK;
188 }
189 else if (wc < ranges[0])
190 /* There cannot be any other matching range since they are
191 sorted. */
192 break;
193 }
194
195 /* One last chance: use the default replacement. */
fb46e8d2 196 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0)
1d96d74d 197 {
17427edd 198 const uint32_t *default_missing = (const uint32_t *)
fb46e8d2 199 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
1d96d74d
UD
200 const unsigned char *toinptr = (const unsigned char *) default_missing;
201 uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
202 _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
403cb8a1 203 unsigned char *outptr;
1d96d74d
UD
204 int res;
205
a8e4c924
UD
206 /* Test whether there is enough input. */
207 if (winbuf + 1 > winbufend)
208 return (winbuf == winbufend
209 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
210
403cb8a1 211 outptr = *outbufstart;
1911b455 212 res = DL_CALL_FCT (fct,
1d96d74d
UD
213 (step, step_data, &toinptr,
214 (const unsigned char *) (default_missing + len),
403cb8a1 215 &outptr, NULL, 0, 0));
1d96d74d
UD
216
217 if (res != __GCONV_ILLEGAL_INPUT)
218 {
219 /* If the conversion succeeds we have to increment the
220 input buffer. */
221 if (res == __GCONV_EMPTY_INPUT)
222 {
a8e4c924 223 /* This worked but is not reversible. */
1d96d74d 224 ++*irreversible;
a8e4c924
UD
225 *inbufp += 4;
226 res = __GCONV_OK;
1d96d74d 227 }
403cb8a1 228 *outbufstart = outptr;
1d96d74d
UD
229
230 return res;
231 }
232 }
233
f1d5c60d 234 /* Haven't found a match. */
55985355
UD
235 return __GCONV_ILLEGAL_INPUT;
236}
ba7b4d29 237libc_hidden_def (__gconv_transliterate)
This page took 0.524156 seconds and 5 git commands to generate.