]>
Commit | Line | Data |
---|---|---|
d614a753 | 1 | /* Copyright (C) 1996-2020 Free Software Foundation, Inc. |
df4ef2ab | 2 | This file is part of the GNU C Library. |
b6aa34eb | 3 | Contributed by Ulrich Drepper <drepper@redhat.com>, 1996. |
a641835a | 4 | |
43bc8ac6 | 5 | This program is free software; you can redistribute it and/or modify |
2e2efe65 RM |
6 | it under the terms of the GNU General Public License as published |
7 | by the Free Software Foundation; version 2 of the License, or | |
8 | (at your option) any later version. | |
a641835a | 9 | |
43bc8ac6 | 10 | This program is distributed in the hope that it will be useful, |
df4ef2ab | 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
43bc8ac6 UD |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU General Public License for more details. | |
a641835a | 14 | |
43bc8ac6 | 15 | You should have received a copy of the GNU General Public License |
5a82c748 | 16 | along with this program; if not, see <https://www.gnu.org/licenses/>. */ |
a641835a RM |
17 | |
18 | #ifdef HAVE_CONFIG_H | |
d1dc39a4 | 19 | # include "config.h" |
a641835a RM |
20 | #endif |
21 | ||
5a97622d | 22 | #include <argp.h> |
d2defdc4 | 23 | #include <assert.h> |
a641835a RM |
24 | #include <ctype.h> |
25 | #include <endian.h> | |
26 | #include <errno.h> | |
27 | #include <error.h> | |
28 | #include <fcntl.h> | |
d2defdc4 UD |
29 | #include <iconv.h> |
30 | #include <langinfo.h> | |
e75154a6 | 31 | #include <locale.h> |
a641835a RM |
32 | #include <libintl.h> |
33 | #include <limits.h> | |
34 | #include <nl_types.h> | |
35 | #include <obstack.h> | |
b6aa34eb | 36 | #include <stdint.h> |
a641835a RM |
37 | #include <stdio.h> |
38 | #include <stdlib.h> | |
39 | #include <string.h> | |
40 | #include <unistd.h> | |
d2defdc4 | 41 | #include <wchar.h> |
a641835a RM |
42 | |
43 | #include "version.h" | |
44 | ||
45 | #include "catgetsinfo.h" | |
46 | ||
47 | ||
48 | #define SWAPU32(w) \ | |
49 | (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) | |
50 | ||
51 | struct message_list | |
52 | { | |
53 | int number; | |
54 | const char *message; | |
55 | ||
56 | const char *fname; | |
57 | size_t line; | |
58 | const char *symbol; | |
59 | ||
60 | struct message_list *next; | |
61 | }; | |
62 | ||
63 | ||
64 | struct set_list | |
65 | { | |
66 | int number; | |
67 | int deleted; | |
68 | struct message_list *messages; | |
69 | int last_message; | |
70 | ||
71 | const char *fname; | |
72 | size_t line; | |
73 | const char *symbol; | |
74 | ||
75 | struct set_list *next; | |
76 | }; | |
77 | ||
78 | ||
79 | struct catalog | |
80 | { | |
81 | struct set_list *all_sets; | |
82 | struct set_list *current_set; | |
83 | size_t total_messages; | |
d2defdc4 | 84 | wint_t quote_char; |
a641835a RM |
85 | int last_set; |
86 | ||
87 | struct obstack mem_pool; | |
88 | }; | |
89 | ||
90 | ||
91 | /* If non-zero force creation of new file, not using existing one. */ | |
92 | static int force_new; | |
93 | ||
5a97622d UD |
94 | /* Name of output file. */ |
95 | static const char *output_name; | |
96 | ||
97 | /* Name of generated C header file. */ | |
98 | static const char *header_name; | |
99 | ||
100 | /* Name and version of program. */ | |
101 | static void print_version (FILE *stream, struct argp_state *state); | |
102 | void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; | |
103 | ||
104 | #define OPT_NEW 1 | |
105 | ||
106 | /* Definitions of arguments for argp functions. */ | |
107 | static const struct argp_option options[] = | |
108 | { | |
109 | { "header", 'H', N_("NAME"), 0, | |
110 | N_("Create C header file NAME containing symbol definitions") }, | |
111 | { "new", OPT_NEW, NULL, 0, | |
112 | N_("Do not use existing catalog, force new output file") }, | |
113 | { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") }, | |
114 | { NULL, 0, NULL, 0, NULL } | |
115 | }; | |
116 | ||
117 | /* Short description of program. */ | |
118 | static const char doc[] = N_("Generate message catalog.\ | |
119 | \vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\ | |
120 | is -, output is written to standard output.\n"); | |
121 | ||
122 | /* Strings for arguments in help texts. */ | |
123 | static const char args_doc[] = N_("\ | |
124 | -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]"); | |
125 | ||
126 | /* Prototype for option handler. */ | |
127 | static error_t parse_opt (int key, char *arg, struct argp_state *state); | |
128 | ||
129 | /* Function to print some extra text in the help message. */ | |
130 | static char *more_help (int key, const char *text, void *input); | |
131 | ||
132 | /* Data structure to communicate with argp functions. */ | |
133 | static struct argp argp = | |
a641835a | 134 | { |
5a97622d | 135 | options, parse_opt, args_doc, doc, NULL, more_help |
a641835a RM |
136 | }; |
137 | ||
5a97622d | 138 | |
a641835a | 139 | /* Wrapper functions with error checking for standard functions. */ |
6ff444c4 | 140 | #include <programs/xmalloc.h> |
a641835a RM |
141 | |
142 | /* Prototypes for local functions. */ | |
a641835a RM |
143 | static void error_print (void); |
144 | static struct catalog *read_input_file (struct catalog *current, | |
145 | const char *fname); | |
146 | static void write_out (struct catalog *result, const char *output_name, | |
147 | const char *header_name); | |
148 | static struct set_list *find_set (struct catalog *current, int number); | |
d2defdc4 | 149 | static void normalize_line (const char *fname, size_t line, iconv_t cd, |
ee25ee65 UD |
150 | wchar_t *string, wchar_t quote_char, |
151 | wchar_t escape_char); | |
a641835a | 152 | static void read_old (struct catalog *catalog, const char *file_name); |
d2defdc4 | 153 | static int open_conversion (const char *codesetp, iconv_t *cd_towcp, |
ee25ee65 | 154 | iconv_t *cd_tombp, wchar_t *escape_charp); |
a641835a RM |
155 | |
156 | ||
157 | int | |
158 | main (int argc, char *argv[]) | |
159 | { | |
160 | struct catalog *result; | |
2f6d1f1b | 161 | int remaining; |
a641835a RM |
162 | |
163 | /* Set program name for messages. */ | |
164 | error_print_progname = error_print; | |
165 | ||
166 | /* Set locale via LC_ALL. */ | |
167 | setlocale (LC_ALL, ""); | |
168 | ||
169 | /* Set the text message domain. */ | |
170 | textdomain (PACKAGE); | |
171 | ||
172 | /* Initialize local variables. */ | |
a641835a RM |
173 | result = NULL; |
174 | ||
5a97622d | 175 | /* Parse and process arguments. */ |
2f6d1f1b | 176 | argp_parse (&argp, argc, argv, 0, &remaining, NULL); |
a641835a RM |
177 | |
178 | /* Determine output file. */ | |
179 | if (output_name == NULL) | |
2f6d1f1b | 180 | output_name = remaining < argc ? argv[remaining++] : "-"; |
a641835a RM |
181 | |
182 | /* Process all input files. */ | |
183 | setlocale (LC_CTYPE, "C"); | |
2f6d1f1b | 184 | if (remaining < argc) |
a641835a | 185 | do |
2f6d1f1b UD |
186 | result = read_input_file (result, argv[remaining]); |
187 | while (++remaining < argc); | |
a641835a RM |
188 | else |
189 | result = read_input_file (NULL, "-"); | |
190 | ||
191 | /* Write out the result. */ | |
192 | if (result != NULL) | |
193 | write_out (result, output_name, header_name); | |
194 | ||
819c56e7 | 195 | return error_message_count != 0; |
a641835a RM |
196 | } |
197 | ||
198 | ||
5a97622d UD |
199 | /* Handle program arguments. */ |
200 | static error_t | |
201 | parse_opt (int key, char *arg, struct argp_state *state) | |
a641835a | 202 | { |
5a97622d | 203 | switch (key) |
fafaa44e | 204 | { |
5a97622d UD |
205 | case 'H': |
206 | header_name = arg; | |
207 | break; | |
208 | case OPT_NEW: | |
209 | force_new = 1; | |
210 | break; | |
211 | case 'o': | |
212 | output_name = arg; | |
213 | break; | |
214 | default: | |
215 | return ARGP_ERR_UNKNOWN; | |
fafaa44e | 216 | } |
5a97622d UD |
217 | return 0; |
218 | } | |
a641835a | 219 | |
5a97622d UD |
220 | |
221 | static char * | |
222 | more_help (int key, const char *text, void *input) | |
223 | { | |
8b748aed | 224 | char *tp = NULL; |
5a97622d UD |
225 | switch (key) |
226 | { | |
227 | case ARGP_KEY_HELP_EXTRA: | |
228 | /* We print some extra information. */ | |
8b748aed | 229 | if (asprintf (&tp, gettext ("\ |
d40eb37a | 230 | For bug reporting instructions, please see:\n\ |
8b748aed JM |
231 | %s.\n"), REPORT_BUGS_TO) < 0) |
232 | return NULL; | |
233 | return tp; | |
5a97622d UD |
234 | default: |
235 | break; | |
236 | } | |
237 | return (char *) text; | |
238 | } | |
239 | ||
240 | /* Print the version information. */ | |
241 | static void | |
242 | print_version (FILE *stream, struct argp_state *state) | |
243 | { | |
8b748aed | 244 | fprintf (stream, "gencat %s%s\n", PKGVERSION, VERSION); |
5a97622d UD |
245 | fprintf (stream, gettext ("\ |
246 | Copyright (C) %s Free Software Foundation, Inc.\n\ | |
247 | This is free software; see the source for copying conditions. There is NO\n\ | |
248 | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ | |
5f72f980 | 249 | "), "2020"); |
5a97622d | 250 | fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); |
a641835a RM |
251 | } |
252 | ||
253 | ||
254 | /* The address of this function will be assigned to the hook in the | |
255 | error functions. */ | |
256 | static void | |
60d2f8f3 | 257 | error_print (void) |
a641835a RM |
258 | { |
259 | /* We don't want the program name to be printed in messages. Emacs' | |
260 | compile.el does not like this. */ | |
261 | } | |
262 | ||
263 | ||
264 | static struct catalog * | |
265 | read_input_file (struct catalog *current, const char *fname) | |
266 | { | |
267 | FILE *fp; | |
268 | char *buf; | |
269 | size_t len; | |
270 | size_t line_number; | |
d2defdc4 UD |
271 | wchar_t *wbuf; |
272 | size_t wbufsize; | |
273 | iconv_t cd_towc = (iconv_t) -1; | |
274 | iconv_t cd_tomb = (iconv_t) -1; | |
ee25ee65 | 275 | wchar_t escape_char = L'\\'; |
d2defdc4 | 276 | char *codeset = NULL; |
a641835a RM |
277 | |
278 | if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0) | |
279 | { | |
280 | fp = stdin; | |
281 | fname = gettext ("*standard input*"); | |
282 | } | |
283 | else | |
284 | fp = fopen (fname, "r"); | |
285 | if (fp == NULL) | |
286 | { | |
287 | error (0, errno, gettext ("cannot open input file `%s'"), fname); | |
288 | return current; | |
289 | } | |
290 | ||
291 | /* If we haven't seen anything yet, allocate result structure. */ | |
292 | if (current == NULL) | |
293 | { | |
a3b2008a | 294 | current = (struct catalog *) xcalloc (1, sizeof (*current)); |
a641835a | 295 | |
df4ef2ab | 296 | #define obstack_chunk_alloc malloc |
a641835a RM |
297 | #define obstack_chunk_free free |
298 | obstack_init (¤t->mem_pool); | |
a3b2008a UD |
299 | |
300 | current->current_set = find_set (current, NL_SETD); | |
a641835a RM |
301 | } |
302 | ||
303 | buf = NULL; | |
304 | len = 0; | |
305 | line_number = 0; | |
d2defdc4 UD |
306 | |
307 | wbufsize = 1024; | |
308 | wbuf = (wchar_t *) xmalloc (wbufsize); | |
309 | ||
a641835a RM |
310 | while (!feof (fp)) |
311 | { | |
312 | int continued; | |
313 | int used; | |
314 | size_t start_line = line_number + 1; | |
315 | char *this_line; | |
316 | ||
317 | do | |
318 | { | |
319 | int act_len; | |
320 | ||
321 | act_len = getline (&buf, &len, fp); | |
322 | if (act_len <= 0) | |
323 | break; | |
324 | ++line_number; | |
325 | ||
326 | /* It the line continued? */ | |
eabea972 | 327 | continued = 0; |
a641835a RM |
328 | if (buf[act_len - 1] == '\n') |
329 | { | |
330 | --act_len; | |
eabea972 UD |
331 | |
332 | /* There might be more than one backslash at the end of | |
333 | the line. Only if there is an odd number of them is | |
334 | the line continued. */ | |
a5392bed | 335 | if (act_len > 0 && buf[act_len - 1] == '\\') |
eabea972 UD |
336 | { |
337 | int temp_act_len = act_len; | |
338 | ||
339 | do | |
340 | { | |
341 | --temp_act_len; | |
342 | continued = !continued; | |
343 | } | |
344 | while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\'); | |
eabea972 | 345 | |
c70ad7d7 UD |
346 | if (continued) |
347 | --act_len; | |
348 | } | |
a641835a | 349 | } |
a641835a RM |
350 | |
351 | /* Append to currently selected line. */ | |
352 | obstack_grow (¤t->mem_pool, buf, act_len); | |
353 | } | |
354 | while (continued); | |
355 | ||
356 | obstack_1grow (¤t->mem_pool, '\0'); | |
357 | this_line = (char *) obstack_finish (¤t->mem_pool); | |
358 | ||
359 | used = 0; | |
360 | if (this_line[0] == '$') | |
361 | { | |
c3880fbd | 362 | if (isblank (this_line[1])) |
d2defdc4 UD |
363 | { |
364 | int cnt = 1; | |
365 | while (isblank (this_line[cnt])) | |
366 | ++cnt; | |
367 | if (strncmp (&this_line[cnt], "codeset=", 8) != 0) | |
368 | /* This is a comment line. Do nothing. */; | |
369 | else if (codeset != NULL) | |
370 | /* Ignore multiple codeset. */; | |
371 | else | |
372 | { | |
373 | int start = cnt + 8; | |
374 | cnt = start; | |
375 | while (this_line[cnt] != '\0' && !isspace (this_line[cnt])) | |
376 | ++cnt; | |
377 | if (cnt != start) | |
378 | { | |
379 | int len = cnt - start; | |
380 | codeset = xmalloc (len + 1); | |
381 | *((char *) mempcpy (codeset, &this_line[start], len)) | |
382 | = '\0'; | |
383 | } | |
384 | } | |
385 | } | |
a641835a RM |
386 | else if (strncmp (&this_line[1], "set", 3) == 0) |
387 | { | |
40a55d20 | 388 | int cnt = sizeof ("set"); |
6dbe2837 | 389 | int set_number; |
a641835a RM |
390 | const char *symbol = NULL; |
391 | while (isspace (this_line[cnt])) | |
392 | ++cnt; | |
393 | ||
394 | if (isdigit (this_line[cnt])) | |
395 | { | |
396 | set_number = atol (&this_line[cnt]); | |
397 | ||
398 | /* If the given number for the character set is | |
399 | higher than any we used for symbolic set names | |
400 | avoid clashing by using only higher numbers for | |
401 | the following symbolic definitions. */ | |
402 | if (set_number > current->last_set) | |
403 | current->last_set = set_number; | |
404 | } | |
405 | else | |
406 | { | |
407 | /* See whether it is a reasonable identifier. */ | |
408 | int start = cnt; | |
409 | while (isalnum (this_line[cnt]) || this_line[cnt] == '_') | |
410 | ++cnt; | |
411 | ||
412 | if (cnt == start) | |
413 | { | |
414 | /* No correct character found. */ | |
415 | error_at_line (0, 0, fname, start_line, | |
416 | gettext ("illegal set number")); | |
417 | set_number = 0; | |
418 | } | |
419 | else | |
420 | { | |
6d52618b | 421 | /* We have found seomthing that looks like a |
a641835a RM |
422 | correct identifier. */ |
423 | struct set_list *runp; | |
424 | ||
425 | this_line[cnt] = '\0'; | |
426 | used = 1; | |
427 | symbol = &this_line[start]; | |
428 | ||
429 | /* Test whether the identifier was already used. */ | |
430 | runp = current->all_sets; | |
431 | while (runp != 0) | |
432 | if (runp->symbol != NULL | |
433 | && strcmp (runp->symbol, symbol) == 0) | |
434 | break; | |
435 | else | |
436 | runp = runp->next; | |
437 | ||
438 | if (runp != NULL) | |
439 | { | |
440 | /* We cannot allow duplicate identifiers for | |
441 | message sets. */ | |
442 | error_at_line (0, 0, fname, start_line, | |
443 | gettext ("duplicate set definition")); | |
444 | error_at_line (0, 0, runp->fname, runp->line, | |
445 | gettext ("\ | |
446 | this is the first definition")); | |
447 | set_number = 0; | |
448 | } | |
449 | else | |
450 | /* Allocate next free message set for identifier. */ | |
451 | set_number = ++current->last_set; | |
452 | } | |
453 | } | |
454 | ||
455 | if (set_number != 0) | |
456 | { | |
457 | /* We found a legal set number. */ | |
458 | current->current_set = find_set (current, set_number); | |
459 | if (symbol != NULL) | |
460 | used = 1; | |
461 | current->current_set->symbol = symbol; | |
462 | current->current_set->fname = fname; | |
463 | current->current_set->line = start_line; | |
464 | } | |
465 | } | |
466 | else if (strncmp (&this_line[1], "delset", 6) == 0) | |
467 | { | |
468 | int cnt = sizeof ("delset"); | |
a641835a RM |
469 | while (isspace (this_line[cnt])) |
470 | ++cnt; | |
471 | ||
472 | if (isdigit (this_line[cnt])) | |
473 | { | |
474 | size_t set_number = atol (&this_line[cnt]); | |
475 | struct set_list *set; | |
476 | ||
477 | /* Mark the message set with the given number as | |
478 | deleted. */ | |
479 | set = find_set (current, set_number); | |
480 | set->deleted = 1; | |
481 | } | |
482 | else | |
483 | { | |
484 | /* See whether it is a reasonable identifier. */ | |
485 | int start = cnt; | |
486 | while (isalnum (this_line[cnt]) || this_line[cnt] == '_') | |
487 | ++cnt; | |
488 | ||
489 | if (cnt == start) | |
5615eaf2 RM |
490 | error_at_line (0, 0, fname, start_line, |
491 | gettext ("illegal set number")); | |
a641835a RM |
492 | else |
493 | { | |
494 | const char *symbol; | |
495 | struct set_list *runp; | |
496 | ||
497 | this_line[cnt] = '\0'; | |
498 | used = 1; | |
499 | symbol = &this_line[start]; | |
500 | ||
501 | /* We have a symbolic set name. This name must | |
502 | appear somewhere else in the catalogs read so | |
503 | far. */ | |
a641835a RM |
504 | for (runp = current->all_sets; runp != NULL; |
505 | runp = runp->next) | |
506 | { | |
507 | if (strcmp (runp->symbol, symbol) == 0) | |
508 | { | |
509 | runp->deleted = 1; | |
510 | break; | |
511 | } | |
512 | } | |
513 | if (runp == NULL) | |
514 | /* Name does not exist before. */ | |
515 | error_at_line (0, 0, fname, start_line, | |
516 | gettext ("unknown set `%s'"), symbol); | |
517 | } | |
518 | } | |
519 | } | |
520 | else if (strncmp (&this_line[1], "quote", 5) == 0) | |
521 | { | |
d2defdc4 UD |
522 | char buf[2]; |
523 | char *bufptr; | |
524 | size_t buflen; | |
525 | char *wbufptr; | |
526 | size_t wbuflen; | |
527 | int cnt; | |
528 | ||
529 | cnt = sizeof ("quote"); | |
a641835a RM |
530 | while (isspace (this_line[cnt])) |
531 | ++cnt; | |
d2defdc4 UD |
532 | |
533 | /* We need the conversion. */ | |
534 | if (cd_towc == (iconv_t) -1 | |
ee25ee65 UD |
535 | && open_conversion (codeset, &cd_towc, &cd_tomb, |
536 | &escape_char) != 0) | |
d2defdc4 UD |
537 | /* Something is wrong. */ |
538 | goto out; | |
539 | ||
a641835a | 540 | /* Yes, the quote char can be '\0'; this means no quote |
d2defdc4 UD |
541 | char. The function using the information works on |
542 | wide characters so we have to convert it here. */ | |
543 | buf[0] = this_line[cnt]; | |
544 | buf[1] = '\0'; | |
545 | bufptr = buf; | |
546 | buflen = 2; | |
547 | ||
548 | wbufptr = (char *) wbuf; | |
549 | wbuflen = wbufsize; | |
550 | ||
551 | /* Flush the state. */ | |
552 | iconv (cd_towc, NULL, NULL, NULL, NULL); | |
553 | ||
554 | iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen); | |
555 | if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2]) | |
556 | error_at_line (0, 0, fname, start_line, | |
557 | gettext ("invalid quote character")); | |
558 | else | |
559 | /* Use the converted wide character. */ | |
560 | current->quote_char = wbuf[0]; | |
a641835a RM |
561 | } |
562 | else | |
563 | { | |
564 | int cnt; | |
565 | cnt = 2; | |
566 | while (this_line[cnt] != '\0' && !isspace (this_line[cnt])) | |
567 | ++cnt; | |
568 | this_line[cnt] = '\0'; | |
569 | error_at_line (0, 0, fname, start_line, | |
570 | gettext ("unknown directive `%s': line ignored"), | |
571 | &this_line[1]); | |
572 | } | |
573 | } | |
574 | else if (isalnum (this_line[0]) || this_line[0] == '_') | |
575 | { | |
576 | const char *ident = this_line; | |
0a70515e | 577 | char *line = this_line; |
a641835a RM |
578 | int message_number; |
579 | ||
580 | do | |
0a70515e UD |
581 | ++line; |
582 | while (line[0] != '\0' && !isspace (line[0])); | |
583 | if (line[0] != '\0') | |
584 | *line++ = '\0'; /* Terminate the identifier. */ | |
a641835a | 585 | |
a641835a RM |
586 | /* Now we found the beginning of the message itself. */ |
587 | ||
588 | if (isdigit (ident[0])) | |
589 | { | |
590 | struct message_list *runp; | |
96eaef36 | 591 | struct message_list *lastp; |
a641835a RM |
592 | |
593 | message_number = atoi (ident); | |
594 | ||
595 | /* Find location to insert the new message. */ | |
596 | runp = current->current_set->messages; | |
96eaef36 | 597 | lastp = NULL; |
a641835a RM |
598 | while (runp != NULL) |
599 | if (runp->number == message_number) | |
600 | break; | |
601 | else | |
96eaef36 UD |
602 | { |
603 | lastp = runp; | |
604 | runp = runp->next; | |
605 | } | |
a641835a RM |
606 | if (runp != NULL) |
607 | { | |
b6aa34eb UD |
608 | /* Oh, oh. There is already a message with this |
609 | number in the message set. */ | |
819c56e7 UD |
610 | if (runp->symbol == NULL) |
611 | { | |
612 | /* The existing message had its number specified | |
613 | by the user. Fatal collision type uh, oh. */ | |
614 | error_at_line (0, 0, fname, start_line, | |
615 | gettext ("duplicated message number")); | |
616 | error_at_line (0, 0, runp->fname, runp->line, | |
617 | gettext ("this is the first definition")); | |
618 | message_number = 0; | |
619 | } | |
620 | else | |
621 | { | |
622 | /* Collision was with number auto-assigned to a | |
623 | symbolic. Change existing symbolic number | |
624 | and move to end the list (if not already there). */ | |
625 | runp->number = ++current->current_set->last_message; | |
626 | ||
627 | if (runp->next != NULL) | |
628 | { | |
629 | struct message_list *endp; | |
630 | ||
631 | if (lastp == NULL) | |
632 | current->current_set->messages=runp->next; | |
633 | else | |
634 | lastp->next=runp->next; | |
635 | ||
636 | endp = runp->next; | |
637 | while (endp->next != NULL) | |
638 | endp = endp->next; | |
639 | ||
640 | endp->next = runp; | |
641 | runp->next = NULL; | |
642 | } | |
643 | } | |
a641835a RM |
644 | } |
645 | ident = NULL; /* We don't have a symbol. */ | |
646 | ||
647 | if (message_number != 0 | |
648 | && message_number > current->current_set->last_message) | |
649 | current->current_set->last_message = message_number; | |
650 | } | |
651 | else if (ident[0] != '\0') | |
652 | { | |
653 | struct message_list *runp; | |
a641835a RM |
654 | |
655 | /* Test whether the symbolic name was not used for | |
656 | another message in this message set. */ | |
96eaef36 | 657 | runp = current->current_set->messages; |
a641835a RM |
658 | while (runp != NULL) |
659 | if (runp->symbol != NULL && strcmp (ident, runp->symbol) == 0) | |
660 | break; | |
661 | else | |
662 | runp = runp->next; | |
663 | if (runp != NULL) | |
664 | { | |
b6aa34eb UD |
665 | /* The name is already used. */ |
666 | error_at_line (0, 0, fname, start_line, gettext ("\ | |
96eaef36 | 667 | duplicated message identifier")); |
b6aa34eb UD |
668 | error_at_line (0, 0, runp->fname, runp->line, |
669 | gettext ("this is the first definition")); | |
a641835a RM |
670 | message_number = 0; |
671 | } | |
672 | else | |
673 | /* Give the message the next unused number. */ | |
674 | message_number = ++current->current_set->last_message; | |
675 | } | |
676 | else | |
677 | message_number = 0; | |
678 | ||
679 | if (message_number != 0) | |
680 | { | |
d2defdc4 UD |
681 | char *inbuf; |
682 | size_t inlen; | |
683 | char *outbuf; | |
684 | size_t outlen; | |
a641835a | 685 | struct message_list *newp; |
0a70515e | 686 | size_t line_len = strlen (line) + 1; |
9d37acc4 | 687 | size_t ident_len = 0; |
d2defdc4 UD |
688 | |
689 | /* We need the conversion. */ | |
690 | if (cd_towc == (iconv_t) -1 | |
ee25ee65 UD |
691 | && open_conversion (codeset, &cd_towc, &cd_tomb, |
692 | &escape_char) != 0) | |
d2defdc4 UD |
693 | /* Something is wrong. */ |
694 | goto out; | |
695 | ||
696 | /* Convert to a wide character string. We have to | |
697 | interpret escape sequences which will be impossible | |
698 | without doing the conversion if the codeset of the | |
699 | message is stateful. */ | |
700 | while (1) | |
701 | { | |
0a70515e UD |
702 | inbuf = line; |
703 | inlen = line_len; | |
d2defdc4 UD |
704 | outbuf = (char *) wbuf; |
705 | outlen = wbufsize; | |
706 | ||
707 | /* Flush the state. */ | |
708 | iconv (cd_towc, NULL, NULL, NULL, NULL); | |
709 | ||
710 | iconv (cd_towc, &inbuf, &inlen, &outbuf, &outlen); | |
711 | if (inlen == 0) | |
712 | { | |
713 | /* The string is converted. */ | |
714 | assert (outlen < wbufsize); | |
715 | assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1] | |
716 | == L'\0'); | |
717 | break; | |
718 | } | |
719 | ||
720 | if (outlen != 0) | |
721 | { | |
722 | /* Something is wrong with this string, we ignore it. */ | |
723 | error_at_line (0, 0, fname, start_line, gettext ("\ | |
724 | invalid character: message ignored")); | |
725 | goto ignore; | |
726 | } | |
727 | ||
728 | /* The output buffer is too small. */ | |
729 | wbufsize *= 2; | |
730 | wbuf = (wchar_t *) xrealloc (wbuf, wbufsize); | |
731 | } | |
a641835a | 732 | |
a641835a RM |
733 | /* Strip quote characters, change escape sequences into |
734 | correct characters etc. */ | |
d2defdc4 | 735 | normalize_line (fname, start_line, cd_towc, wbuf, |
ee25ee65 | 736 | current->quote_char, escape_char); |
a641835a | 737 | |
9d37acc4 UD |
738 | if (ident) |
739 | ident_len = line - this_line; | |
740 | ||
d2defdc4 UD |
741 | /* Now the string is free of escape sequences. Convert it |
742 | back into a multibyte character string. First free the | |
743 | memory allocated for the original string. */ | |
744 | obstack_free (¤t->mem_pool, this_line); | |
745 | ||
0a70515e UD |
746 | used = 1; /* Yes, we use the line. */ |
747 | ||
d2defdc4 UD |
748 | /* Now fill in the new string. It should never happen that |
749 | the replaced string is longer than the original. */ | |
750 | inbuf = (char *) wbuf; | |
751 | inlen = (wcslen (wbuf) + 1) * sizeof (wchar_t); | |
752 | ||
753 | outlen = obstack_room (¤t->mem_pool); | |
0a70515e UD |
754 | obstack_blank (¤t->mem_pool, outlen); |
755 | this_line = (char *) obstack_base (¤t->mem_pool); | |
9d37acc4 UD |
756 | outbuf = this_line + ident_len; |
757 | outlen -= ident_len; | |
d2defdc4 UD |
758 | |
759 | /* Flush the state. */ | |
760 | iconv (cd_tomb, NULL, NULL, NULL, NULL); | |
761 | ||
762 | iconv (cd_tomb, &inbuf, &inlen, &outbuf, &outlen); | |
763 | if (inlen != 0) | |
764 | { | |
765 | error_at_line (0, 0, fname, start_line, | |
766 | gettext ("invalid line")); | |
767 | goto ignore; | |
768 | } | |
769 | assert (outbuf[-1] == '\0'); | |
770 | ||
771 | /* Free the memory in the obstack we don't use. */ | |
0a70515e UD |
772 | obstack_blank (¤t->mem_pool, -(int) outlen); |
773 | line = obstack_finish (¤t->mem_pool); | |
d2defdc4 | 774 | |
a641835a RM |
775 | newp = (struct message_list *) xmalloc (sizeof (*newp)); |
776 | newp->number = message_number; | |
9d37acc4 | 777 | newp->message = line + ident_len; |
a641835a | 778 | /* Remember symbolic name; is NULL if no is given. */ |
9d37acc4 | 779 | newp->symbol = ident ? line : NULL; |
a641835a RM |
780 | /* Remember where we found the character. */ |
781 | newp->fname = fname; | |
782 | newp->line = start_line; | |
783 | ||
784 | /* Find place to insert to message. We keep them in a | |
785 | sorted single linked list. */ | |
786 | if (current->current_set->messages == NULL | |
787 | || current->current_set->messages->number > message_number) | |
788 | { | |
789 | newp->next = current->current_set->messages; | |
790 | current->current_set->messages = newp; | |
791 | } | |
792 | else | |
793 | { | |
794 | struct message_list *runp; | |
795 | runp = current->current_set->messages; | |
796 | while (runp->next != NULL) | |
797 | if (runp->next->number > message_number) | |
798 | break; | |
799 | else | |
800 | runp = runp->next; | |
801 | newp->next = runp->next; | |
802 | runp->next = newp; | |
803 | } | |
804 | } | |
805 | ++current->total_messages; | |
806 | } | |
807 | else | |
808 | { | |
809 | size_t cnt; | |
810 | ||
811 | cnt = 0; | |
812 | /* See whether we have any non-white space character in this | |
813 | line. */ | |
814 | while (this_line[cnt] != '\0' && isspace (this_line[cnt])) | |
815 | ++cnt; | |
816 | ||
817 | if (this_line[cnt] != '\0') | |
818 | /* Yes, some unknown characters found. */ | |
819 | error_at_line (0, 0, fname, start_line, | |
820 | gettext ("malformed line ignored")); | |
821 | } | |
822 | ||
d2defdc4 | 823 | ignore: |
a641835a RM |
824 | /* We can save the memory for the line if it was not used. */ |
825 | if (!used) | |
826 | obstack_free (¤t->mem_pool, this_line); | |
827 | } | |
828 | ||
d2defdc4 UD |
829 | /* Close the conversion modules. */ |
830 | iconv_close (cd_towc); | |
831 | iconv_close (cd_tomb); | |
832 | free (codeset); | |
833 | ||
834 | out: | |
835 | free (wbuf); | |
836 | ||
a641835a RM |
837 | if (fp != stdin) |
838 | fclose (fp); | |
839 | return current; | |
840 | } | |
841 | ||
842 | ||
843 | static void | |
844 | write_out (struct catalog *catalog, const char *output_name, | |
845 | const char *header_name) | |
846 | { | |
847 | /* Computing the "optimal" size. */ | |
848 | struct set_list *set_run; | |
849 | size_t best_total, best_size, best_depth; | |
850 | size_t act_size, act_depth; | |
851 | struct catalog_obj obj; | |
852 | struct obstack string_pool; | |
853 | const char *strings; | |
854 | size_t strings_size; | |
b6aa34eb | 855 | uint32_t *array1, *array2; |
a641835a RM |
856 | size_t cnt; |
857 | int fd; | |
858 | ||
859 | /* If not otherwise told try to read file with existing | |
860 | translations. */ | |
861 | if (!force_new) | |
862 | read_old (catalog, output_name); | |
863 | ||
864 | /* Initialize best_size with a very high value. */ | |
865 | best_total = best_size = best_depth = UINT_MAX; | |
866 | ||
867 | /* We need some start size for testing. Let's start with | |
868 | TOTAL_MESSAGES / 5, which theoretically provides a mean depth of | |
869 | 5. */ | |
870 | act_size = 1 + catalog->total_messages / 5; | |
871 | ||
872 | /* We determine the size of a hash table here. Because the message | |
873 | numbers can be chosen arbitrary by the programmer we cannot use | |
874 | the simple method of accessing the array using the message | |
875 | number. The algorithm is based on the trivial hash function | |
876 | NUMBER % TABLE_SIZE, where collisions are stored in a second | |
877 | dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that | |
878 | the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */ | |
879 | while (act_size <= best_total) | |
880 | { | |
881 | size_t deep[act_size]; | |
882 | ||
883 | act_depth = 1; | |
884 | memset (deep, '\0', act_size * sizeof (size_t)); | |
885 | set_run = catalog->all_sets; | |
886 | while (set_run != NULL) | |
887 | { | |
888 | struct message_list *message_run; | |
889 | ||
890 | message_run = set_run->messages; | |
891 | while (message_run != NULL) | |
892 | { | |
893 | size_t idx = (message_run->number * set_run->number) % act_size; | |
894 | ||
895 | ++deep[idx]; | |
896 | if (deep[idx] > act_depth) | |
897 | { | |
898 | act_depth = deep[idx]; | |
899 | if (act_depth * act_size > best_total) | |
900 | break; | |
901 | } | |
902 | message_run = message_run->next; | |
903 | } | |
a641835a RM |
904 | set_run = set_run->next; |
905 | } | |
906 | ||
adc6ff7f RM |
907 | if (act_depth * act_size <= best_total) |
908 | { | |
909 | /* We have found a better solution. */ | |
910 | best_total = act_depth * act_size; | |
911 | best_size = act_size; | |
912 | best_depth = act_depth; | |
913 | } | |
914 | ||
a641835a RM |
915 | ++act_size; |
916 | } | |
917 | ||
918 | /* let's be prepared for an empty message file. */ | |
919 | if (best_size == UINT_MAX) | |
920 | { | |
921 | best_size = 1; | |
922 | best_depth = 1; | |
923 | } | |
924 | ||
925 | /* OK, now we have the size we will use. Fill in the header, build | |
926 | the table and the second one with swapped byte order. */ | |
927 | obj.magic = CATGETS_MAGIC; | |
928 | obj.plane_size = best_size; | |
929 | obj.plane_depth = best_depth; | |
930 | ||
931 | /* Allocate room for all needed arrays. */ | |
932 | array1 = | |
b6aa34eb UD |
933 | (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3); |
934 | memset (array1, '\0', best_size * best_depth * sizeof (uint32_t) * 3); | |
a641835a | 935 | array2 |
b6aa34eb | 936 | = (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3); |
a641835a RM |
937 | obstack_init (&string_pool); |
938 | ||
939 | set_run = catalog->all_sets; | |
940 | while (set_run != NULL) | |
941 | { | |
942 | struct message_list *message_run; | |
943 | ||
944 | message_run = set_run->messages; | |
945 | while (message_run != NULL) | |
946 | { | |
947 | size_t idx = (((message_run->number * set_run->number) % best_size) | |
948 | * 3); | |
949 | /* Determine collision depth. */ | |
950 | while (array1[idx] != 0) | |
951 | idx += best_size * 3; | |
952 | ||
953 | /* Store set number, message number and pointer into string | |
954 | space, relative to the first string. */ | |
955 | array1[idx + 0] = set_run->number; | |
956 | array1[idx + 1] = message_run->number; | |
957 | array1[idx + 2] = obstack_object_size (&string_pool); | |
958 | ||
959 | /* Add current string to the continuous space containing all | |
960 | strings. */ | |
961 | obstack_grow0 (&string_pool, message_run->message, | |
962 | strlen (message_run->message)); | |
963 | ||
964 | message_run = message_run->next; | |
965 | } | |
966 | ||
967 | set_run = set_run->next; | |
968 | } | |
969 | strings_size = obstack_object_size (&string_pool); | |
970 | strings = obstack_finish (&string_pool); | |
971 | ||
972 | /* Compute ARRAY2 by changing the byte order. */ | |
973 | for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt) | |
974 | array2[cnt] = SWAPU32 (array1[cnt]); | |
975 | ||
976 | /* Now we can write out the whole data. */ | |
977 | if (strcmp (output_name, "-") == 0 | |
978 | || strcmp (output_name, "/dev/stdout") == 0) | |
979 | fd = STDOUT_FILENO; | |
980 | else | |
981 | { | |
982 | fd = creat (output_name, 0666); | |
983 | if (fd < 0) | |
984 | error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"), | |
985 | output_name); | |
986 | } | |
987 | ||
988 | /* Write out header. */ | |
989 | write (fd, &obj, sizeof (obj)); | |
990 | ||
991 | /* We always write out the little endian version of the index | |
992 | arrays. */ | |
993 | #if __BYTE_ORDER == __LITTLE_ENDIAN | |
b6aa34eb UD |
994 | write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3); |
995 | write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3); | |
a641835a | 996 | #elif __BYTE_ORDER == __BIG_ENDIAN |
b6aa34eb UD |
997 | write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3); |
998 | write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3); | |
a641835a RM |
999 | #else |
1000 | # error Cannot handle __BYTE_ORDER byte order | |
1001 | #endif | |
1002 | ||
1003 | /* Finally write the strings. */ | |
1004 | write (fd, strings, strings_size); | |
1005 | ||
1006 | if (fd != STDOUT_FILENO) | |
1007 | close (fd); | |
1008 | ||
1009 | /* If requested now write out the header file. */ | |
1010 | if (header_name != NULL) | |
1011 | { | |
1012 | int first = 1; | |
1013 | FILE *fp; | |
1014 | ||
1015 | /* Open output file. "-" or "/dev/stdout" means write to | |
1016 | standard output. */ | |
1017 | if (strcmp (header_name, "-") == 0 | |
1018 | || strcmp (header_name, "/dev/stdout") == 0) | |
1019 | fp = stdout; | |
1020 | else | |
1021 | { | |
1022 | fp = fopen (header_name, "w"); | |
1023 | if (fp == NULL) | |
1024 | error (EXIT_FAILURE, errno, | |
1025 | gettext ("cannot open output file `%s'"), header_name); | |
1026 | } | |
1027 | ||
1028 | /* Iterate over all sets and all messages. */ | |
1029 | set_run = catalog->all_sets; | |
1030 | while (set_run != NULL) | |
1031 | { | |
1032 | struct message_list *message_run; | |
1033 | ||
1034 | /* If the current message set has a symbolic name write this | |
1035 | out first. */ | |
1036 | if (set_run->symbol != NULL) | |
a4242e25 | 1037 | fprintf (fp, "%s#define %sSet %#x\t/* %s:%Zu */\n", |
a641835a RM |
1038 | first ? "" : "\n", set_run->symbol, set_run->number - 1, |
1039 | set_run->fname, set_run->line); | |
1040 | first = 0; | |
1041 | ||
1042 | message_run = set_run->messages; | |
1043 | while (message_run != NULL) | |
1044 | { | |
1045 | /* If the current message has a symbolic name write | |
1046 | #define out. But we have to take care for the set | |
1047 | not having a symbolic name. */ | |
1048 | if (message_run->symbol != NULL) | |
6e4c40ba UD |
1049 | { |
1050 | if (set_run->symbol == NULL) | |
1051 | fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n", | |
1052 | set_run->number, message_run->symbol, | |
1053 | message_run->number, message_run->fname, | |
1054 | message_run->line); | |
1055 | else | |
1056 | fprintf (fp, "#define %s%s %#x\t/* %s:%Zu */\n", | |
1057 | set_run->symbol, message_run->symbol, | |
1058 | message_run->number, message_run->fname, | |
1059 | message_run->line); | |
1060 | } | |
a641835a RM |
1061 | |
1062 | message_run = message_run->next; | |
1063 | } | |
1064 | ||
1065 | set_run = set_run->next; | |
1066 | } | |
1067 | ||
1068 | if (fp != stdout) | |
1069 | fclose (fp); | |
1070 | } | |
1071 | } | |
1072 | ||
1073 | ||
1074 | static struct set_list * | |
1075 | find_set (struct catalog *current, int number) | |
1076 | { | |
1077 | struct set_list *result = current->all_sets; | |
1078 | ||
1079 | /* We must avoid set number 0 because a set of this number signals | |
1080 | in the tables that the entry is not occupied. */ | |
1081 | ++number; | |
1082 | ||
1083 | while (result != NULL) | |
1084 | if (result->number == number) | |
1085 | return result; | |
1086 | else | |
1087 | result = result->next; | |
1088 | ||
1089 | /* Prepare new message set. */ | |
a3b2008a | 1090 | result = (struct set_list *) xcalloc (1, sizeof (*result)); |
a641835a | 1091 | result->number = number; |
a641835a RM |
1092 | result->next = current->all_sets; |
1093 | current->all_sets = result; | |
1094 | ||
1095 | return result; | |
1096 | } | |
1097 | ||
1098 | ||
1099 | /* Normalize given string *in*place* by processing escape sequences | |
1100 | and quote characters. */ | |
1101 | static void | |
d2defdc4 | 1102 | normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string, |
ee25ee65 | 1103 | wchar_t quote_char, wchar_t escape_char) |
a641835a RM |
1104 | { |
1105 | int is_quoted; | |
d2defdc4 UD |
1106 | wchar_t *rp = string; |
1107 | wchar_t *wp = string; | |
a641835a | 1108 | |
d2defdc4 | 1109 | if (quote_char != L'\0' && *rp == quote_char) |
a641835a RM |
1110 | { |
1111 | is_quoted = 1; | |
1112 | ++rp; | |
1113 | } | |
1114 | else | |
1115 | is_quoted = 0; | |
1116 | ||
d2defdc4 | 1117 | while (*rp != L'\0') |
a641835a RM |
1118 | if (*rp == quote_char) |
1119 | /* We simply end the string when we find the first time an | |
1120 | not-escaped quote character. */ | |
1121 | break; | |
ee25ee65 | 1122 | else if (*rp == escape_char) |
a641835a RM |
1123 | { |
1124 | ++rp; | |
d2defdc4 | 1125 | if (quote_char != L'\0' && *rp == quote_char) |
a641835a RM |
1126 | /* This is an extension to XPG. */ |
1127 | *wp++ = *rp++; | |
1128 | else | |
1129 | /* Recognize escape sequences. */ | |
1130 | switch (*rp) | |
1131 | { | |
d2defdc4 UD |
1132 | case L'n': |
1133 | *wp++ = L'\n'; | |
a641835a RM |
1134 | ++rp; |
1135 | break; | |
d2defdc4 UD |
1136 | case L't': |
1137 | *wp++ = L'\t'; | |
a641835a RM |
1138 | ++rp; |
1139 | break; | |
d2defdc4 UD |
1140 | case L'v': |
1141 | *wp++ = L'\v'; | |
a641835a RM |
1142 | ++rp; |
1143 | break; | |
d2defdc4 UD |
1144 | case L'b': |
1145 | *wp++ = L'\b'; | |
a641835a RM |
1146 | ++rp; |
1147 | break; | |
d2defdc4 UD |
1148 | case L'r': |
1149 | *wp++ = L'\r'; | |
a641835a RM |
1150 | ++rp; |
1151 | break; | |
d2defdc4 UD |
1152 | case L'f': |
1153 | *wp++ = L'\f'; | |
a641835a RM |
1154 | ++rp; |
1155 | break; | |
d2defdc4 | 1156 | case L'0' ... L'7': |
a641835a | 1157 | { |
d2defdc4 UD |
1158 | int number; |
1159 | char cbuf[2]; | |
1160 | char *cbufptr; | |
1161 | size_t cbufin; | |
1162 | wchar_t wcbuf[2]; | |
1163 | char *wcbufptr; | |
1164 | size_t wcbufin; | |
1165 | ||
1166 | number = *rp++ - L'0'; | |
1167 | while (number <= (255 / 8) && *rp >= L'0' && *rp <= L'7') | |
a641835a RM |
1168 | { |
1169 | number *= 8; | |
d2defdc4 | 1170 | number += *rp++ - L'0'; |
a641835a | 1171 | } |
d2defdc4 UD |
1172 | |
1173 | cbuf[0] = (char) number; | |
1174 | cbuf[1] = '\0'; | |
1175 | cbufptr = cbuf; | |
1176 | cbufin = 2; | |
1177 | ||
1178 | wcbufptr = (char *) wcbuf; | |
1179 | wcbufin = sizeof (wcbuf); | |
1180 | ||
1181 | /* Flush the state. */ | |
1182 | iconv (cd, NULL, NULL, NULL, NULL); | |
1183 | ||
1184 | iconv (cd, &cbufptr, &cbufin, &wcbufptr, &wcbufin); | |
1185 | if (cbufptr != &cbuf[2] || (wchar_t *) wcbufptr != &wcbuf[2]) | |
1186 | error_at_line (0, 0, fname, line, | |
1187 | gettext ("invalid escape sequence")); | |
1188 | else | |
1189 | *wp++ = wcbuf[0]; | |
a641835a RM |
1190 | } |
1191 | break; | |
1192 | default: | |
ee25ee65 UD |
1193 | if (*rp == escape_char) |
1194 | { | |
1195 | *wp++ = escape_char; | |
1196 | ++rp; | |
1197 | } | |
1198 | else | |
05383720 JM |
1199 | { |
1200 | /* Simply ignore the backslash character. */ | |
1201 | } | |
a641835a RM |
1202 | break; |
1203 | } | |
1204 | } | |
1205 | else | |
1206 | *wp++ = *rp++; | |
1207 | ||
1208 | /* If we saw a quote character at the beginning we expect another | |
1209 | one at the end. */ | |
1210 | if (is_quoted && *rp != quote_char) | |
d2defdc4 | 1211 | error_at_line (0, 0, fname, line, gettext ("unterminated message")); |
a641835a RM |
1212 | |
1213 | /* Terminate string. */ | |
d2defdc4 | 1214 | *wp = L'\0'; |
a641835a RM |
1215 | return; |
1216 | } | |
1217 | ||
1218 | ||
1219 | static void | |
1220 | read_old (struct catalog *catalog, const char *file_name) | |
1221 | { | |
1222 | struct catalog_info old_cat_obj; | |
1223 | struct set_list *set = NULL; | |
1224 | int last_set = -1; | |
1225 | size_t cnt; | |
1226 | ||
a641835a | 1227 | /* Try to open catalog, but don't look through the NLSPATH. */ |
ca130fe4 | 1228 | if (__open_catalog (file_name, NULL, NULL, &old_cat_obj) != 0) |
6e4c40ba UD |
1229 | { |
1230 | if (errno == ENOENT) | |
1231 | /* No problem, the catalog simply does not exist. */ | |
1232 | return; | |
1233 | else | |
ca130fe4 UD |
1234 | error (EXIT_FAILURE, errno, |
1235 | gettext ("while opening old catalog file")); | |
6e4c40ba | 1236 | } |
a641835a RM |
1237 | |
1238 | /* OK, we have the catalog loaded. Now read all messages and merge | |
1239 | them. When set and message number clash for any message the new | |
b6aa34eb UD |
1240 | one is used. If the new one is empty it indicates that the |
1241 | message should be deleted. */ | |
a641835a RM |
1242 | for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt) |
1243 | { | |
1244 | struct message_list *message, *last; | |
1245 | ||
1246 | if (old_cat_obj.name_ptr[cnt * 3 + 0] == 0) | |
1247 | /* No message in this slot. */ | |
1248 | continue; | |
1249 | ||
b6aa34eb | 1250 | if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (uint32_t) last_set) |
a641835a RM |
1251 | { |
1252 | last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1; | |
1253 | set = find_set (catalog, old_cat_obj.name_ptr[cnt * 3 + 0] - 1); | |
1254 | } | |
1255 | ||
1256 | last = NULL; | |
1257 | message = set->messages; | |
1258 | while (message != NULL) | |
1259 | { | |
b6aa34eb | 1260 | if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1]) |
a641835a RM |
1261 | break; |
1262 | last = message; | |
1263 | message = message->next; | |
1264 | } | |
1265 | ||
1266 | if (message == NULL | |
b6aa34eb | 1267 | || (uint32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1]) |
a641835a RM |
1268 | { |
1269 | /* We have found a message which is not yet in the catalog. | |
1270 | Insert it at the right position. */ | |
1271 | struct message_list *newp; | |
1272 | ||
c4f50205 | 1273 | newp = (struct message_list *) xmalloc (sizeof (*newp)); |
a641835a RM |
1274 | newp->number = old_cat_obj.name_ptr[cnt * 3 + 1]; |
1275 | newp->message = | |
1276 | &old_cat_obj.strings[old_cat_obj.name_ptr[cnt * 3 + 2]]; | |
1277 | newp->fname = NULL; | |
1278 | newp->line = 0; | |
1279 | newp->symbol = NULL; | |
1280 | newp->next = message; | |
1281 | ||
1282 | if (last == NULL) | |
1283 | set->messages = newp; | |
1284 | else | |
1285 | last->next = newp; | |
1286 | ||
1287 | ++catalog->total_messages; | |
1288 | } | |
b6aa34eb UD |
1289 | else if (*message->message == '\0') |
1290 | { | |
1291 | /* The new empty message has overridden the old one thus | |
1292 | "deleting" it as required. Now remove the empty remains. */ | |
1293 | if (last == NULL) | |
1294 | set->messages = message->next; | |
1295 | else | |
1296 | last->next = message->next; | |
1297 | } | |
a641835a RM |
1298 | } |
1299 | } | |
d2defdc4 UD |
1300 | |
1301 | ||
1302 | static int | |
ee25ee65 UD |
1303 | open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp, |
1304 | wchar_t *escape_charp) | |
d2defdc4 | 1305 | { |
ee25ee65 UD |
1306 | char buf[2]; |
1307 | char *bufptr; | |
1308 | size_t bufsize; | |
1309 | wchar_t wbuf[2]; | |
1310 | char *wbufptr; | |
1311 | size_t wbufsize; | |
1312 | ||
d2defdc4 UD |
1313 | /* If the input file does not specify the codeset use the locale's. */ |
1314 | if (codeset == NULL) | |
1315 | { | |
1316 | setlocale (LC_ALL, ""); | |
1317 | codeset = nl_langinfo (CODESET); | |
1318 | setlocale (LC_ALL, "C"); | |
1319 | } | |
1320 | ||
1321 | /* Get the conversion modules. */ | |
1322 | *cd_towcp = iconv_open ("WCHAR_T", codeset); | |
1323 | *cd_tombp = iconv_open (codeset, "WCHAR_T"); | |
1324 | if (*cd_towcp == (iconv_t) -1 || *cd_tombp == (iconv_t) -1) | |
1325 | { | |
1326 | error (0, 0, gettext ("conversion modules not available")); | |
1327 | if (*cd_towcp != (iconv_t) -1) | |
1328 | iconv_close (*cd_towcp); | |
1329 | ||
1330 | return 1; | |
1331 | } | |
1332 | ||
ee25ee65 UD |
1333 | /* One special case for historical reasons is the backslash |
1334 | character. In some codesets the byte value 0x5c is not mapped to | |
1335 | U005c in Unicode. These charsets then don't have a backslash | |
1336 | character at all. Therefore we have to live with whatever the | |
1337 | codeset provides and recognize, instead of the U005c, the character | |
1338 | the byte value 0x5c is mapped to. */ | |
1339 | buf[0] = '\\'; | |
1340 | buf[1] = '\0'; | |
1341 | bufptr = buf; | |
1342 | bufsize = 2; | |
1343 | ||
1344 | wbufptr = (char *) wbuf; | |
1345 | wbufsize = sizeof (wbuf); | |
1346 | ||
1347 | iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize); | |
1348 | if (bufsize != 0 || wbufsize != 0) | |
1349 | { | |
1350 | /* Something went wrong, we couldn't convert the byte 0x5c. Go | |
1351 | on with using U005c. */ | |
1352 | error (0, 0, gettext ("cannot determine escape character")); | |
1353 | *escape_charp = L'\\'; | |
1354 | } | |
1355 | else | |
1356 | *escape_charp = wbuf[0]; | |
1357 | ||
d2defdc4 UD |
1358 | return 0; |
1359 | } |