From 4b10dd6c1959577f57850ca427a94fe22b9f3299 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 31 Aug 1999 07:04:41 +0000 Subject: Update. * locale/Makefile (distribute): Add iso-639.def and iso-3166.def. Change charset.h to charmap.h. (categories): Add new categories. Leave out collate for now. Update build rules. * locale/categories.def: Add definitions for new categories. * locale/langinfo.h: Likewise. * locale/locale.h: Likewise. * locale/C-address.c: New file. * locale/C-identification.c: New file. * locale/C-measurement.c: New file. * locale/C-name.c: New file. * locale/C-paper.c: New file. * locale/C-telephone.c: New file. * locale/lc-address.c: Likewise. * locale/lc-identification.c: Likewise. * locale/lc-measurement.c: Likewise. * locale/lc-name.c: Likewise. * locale/lc-paper.c: Likewise. * locale/lc-telephone.c: Likewise. * locale/C-ctype.c: Update for locale rewrite. * locale/C-messages.c: Likewise. * locale/C-monetary.c: Likewise. * locale/C-time.c: Likewise. * locale/lc-collate.c: Likewise. * locale/lc-ctype.c: Likewise. * locale/lc-monetary.c: Likewise. * locale/lc-time.c: Likewise. * locale/localeinfo.h: Likewise. * locale/newlocale.c: Likewise. * locale/setlocale.c: Likewise. * locale/weight.h: Likewise. * locale/findlocale.c: Unconditionally use mmap. Handle new categories. * locale/loadlocale.c: Likewise. * locale/iso-3166.def: New file. * locale/iso-639.def: New file. * locale/programs/charmap-kw.gperf: Add new keywords. * locale/programs/locfile-kw.gperf: Likewise. * locale/programs/locfile-token.h: Define new tokens. * locale/programs/charmap.c: Rewrite to handle multibyte charsets. * locale/programs/charmap.h: New file. * locale/programs/charset.h: Removed. * locale/programs/config.h: Add __LC_LAST. * locale/programs/lc-address.c: New file. * locale/programs/lc-identification.c: New file. * locale/programs/lc-measurement.c: New file. * locale/programs/lc-name.c: New file. * locale/programs/lc-paper.c: New file. * locale/programs/lc-telephone.c: New file. * locale/programs/lc-collate.c: Update for locale rewrite. * locale/programs/lc-ctype.c: Likewise. * locale/programs/lc-messages.c: Likewise. * locale/programs/lc-monetary.c: Likewise. * locale/programs/lc-numeric.c: Likewise. * locale/programs/lc-time.c: Likewise. * locale/programs/locale.c: Likewise. * locale/programs/localedef.c: Likewise. * locale/programs/locfile.c: Likewise. * locale/programs/repertoire.c: Likewise. * locale/programs/repertoire.h: Likewise. * locale/programs/locfile.c: Update prototypes. Update handle_copy definition. * locale/programs/linereader.c: Add handling of wide char strings and new definition file syntax. * locale/programs/linereader.h (struct token): Add elements for wide character strings. * locale/programs/locale-spec.c: Disable handling of collation elements for now. * locale/programs/simple-hash.h: Cleanup. * locale/programs/stringtrans.h: Handle quite of end of line. * string/strcoll.c: Fall back on strcmp for now. * string/strxfrm.c: Fall back on strncpy/strlen for now. * time/strftime.c: Use new wide character data for wcsftime. * time/strptime.c: Remove _nl_C_LC_TIME declaration. * wctype/cname-lookup.h: Update for new LC_CTYPE data. --- locale/programs/charmap-kw.gperf | 7 +- locale/programs/charmap-kw.h | 120 +- locale/programs/charmap.c | 284 +++- locale/programs/charmap.h | 78 + locale/programs/charset.h | 74 - locale/programs/config.h | 30 +- locale/programs/ld-address.c | 514 ++++++ locale/programs/ld-collate.c | 1819 +++++++++++++++++---- locale/programs/ld-ctype.c | 3044 +++++++++++++++++++++++++++-------- locale/programs/ld-identification.c | 376 +++++ locale/programs/ld-measurement.c | 206 +++ locale/programs/ld-messages.c | 252 +-- locale/programs/ld-monetary.c | 757 +++++++-- locale/programs/ld-name.c | 276 ++++ locale/programs/ld-numeric.c | 293 ++-- locale/programs/ld-paper.c | 235 +++ locale/programs/ld-telephone.c | 283 ++++ locale/programs/ld-time.c | 1155 ++++++++++--- locale/programs/linereader.c | 452 ++++-- locale/programs/linereader.h | 34 +- locale/programs/locale-spec.c | 2 +- locale/programs/locale.c | 8 +- locale/programs/localedef.c | 299 ++-- locale/programs/localedef.h | 131 ++ locale/programs/locfile-kw.gperf | 250 ++- locale/programs/locfile-kw.h | 482 ++++-- locale/programs/locfile-token.h | 111 +- locale/programs/locfile.c | 1088 ++----------- locale/programs/locfile.h | 264 ++- locale/programs/repertoire.c | 225 ++- locale/programs/repertoire.h | 31 +- locale/programs/simple-hash.h | 30 +- locale/programs/stringtrans.c | 10 +- 33 files changed, 9932 insertions(+), 3288 deletions(-) create mode 100644 locale/programs/charmap.h delete mode 100644 locale/programs/charset.h create mode 100644 locale/programs/ld-address.c create mode 100644 locale/programs/ld-identification.c create mode 100644 locale/programs/ld-measurement.c create mode 100644 locale/programs/ld-name.c create mode 100644 locale/programs/ld-paper.c create mode 100644 locale/programs/ld-telephone.c create mode 100644 locale/programs/localedef.h (limited to 'locale/programs') diff --git a/locale/programs/charmap-kw.gperf b/locale/programs/charmap-kw.gperf index 1fb9c38b04..34241041a6 100644 --- a/locale/programs/charmap-kw.gperf +++ b/locale/programs/charmap-kw.gperf @@ -1,7 +1,7 @@ %{ -/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper, . + Contributed by Ulrich Drepper, . The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -33,6 +33,9 @@ g0esc, tok_g0esc, 1 g1esc, tok_g1esc, 1 g2esc, tok_g2esc, 1 g3esc, tok_g3esc, 1 +escseq, tok_escseq, 1 +addset, tok_addset, 1 +include, tok_include, 1 CHARMAP, tok_charmap, 0 END, tok_end, 0 WIDTH, tok_width, 0 diff --git a/locale/programs/charmap-kw.h b/locale/programs/charmap-kw.h index 3bfcd14612..4b402165ef 100644 --- a/locale/programs/charmap-kw.h +++ b/locale/programs/charmap-kw.h @@ -1,8 +1,8 @@ -/* C code produced by gperf version 2.5 (GNU C++ version) */ -/* Command-line: gperf -acCgopt -k1,2,5,$ -N charmap_hash programs/charmap-kw.gperf */ -/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +/* ANSI-C code produced by gperf version 2.7.1 (19981006 egcs) */ +/* Command-line: gperf -acCgopt -k1,2,5,9,$ -L ANSI-C -N charmap_hash programs/charmap-kw.gperf */ +/* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper, . + Contributed by Ulrich Drepper, . The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -24,81 +24,103 @@ #include "locfile-token.h" struct keyword_t ; -#define TOTAL_KEYWORDS 14 +#define TOTAL_KEYWORDS 17 #define MIN_WORD_LENGTH 3 #define MAX_WORD_LENGTH 14 #define MIN_HASH_VALUE 3 -#define MAX_HASH_VALUE 25 -/* maximum key range = 23, duplicates = 0 */ +#define MAX_HASH_VALUE 35 +/* maximum key range = 33, duplicates = 0 */ #ifdef __GNUC__ -inline +__inline #endif static unsigned int -hash (register const char *str, register int len) +hash (register const char *str, register unsigned int len) { static const unsigned char asso_values[] = { - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 14, 10, - 15, 4, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 0, 0, 0, - 26, 26, 0, 0, 26, 26, 26, 0, 0, 26, - 0, 26, 26, 26, 5, 26, 26, 0, 26, 26, - 26, 26, 26, 26, 26, 0, 26, 26, 0, 0, - 26, 0, 26, 0, 26, 26, 26, 26, 26, 0, - 15, 0, 0, 26, 0, 0, 26, 0, 26, 26, - 0, 26, 26, 26, 26, 26, 26, 26, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 25, 10, + 15, 20, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 10, 0, 0, + 5, 36, 0, 0, 36, 36, 36, 0, 0, 36, + 0, 36, 0, 36, 0, 36, 36, 0, 36, 36, + 36, 36, 36, 36, 36, 0, 36, 0, 0, 0, + 10, 0, 36, 0, 0, 0, 36, 36, 36, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 36, 36, + 25, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36 }; register int hval = len; switch (hval) { default: + case 9: + hval += asso_values[(unsigned char)str[8]]; + case 8: + case 7: + case 6: case 5: - hval += asso_values[str[4]]; + hval += asso_values[(unsigned char)str[4]]; case 4: case 3: case 2: - hval += asso_values[str[1]]; + hval += asso_values[(unsigned char)str[1]]; case 1: - hval += asso_values[str[0]]; + hval += asso_values[(unsigned char)str[0]]; break; } - return hval + asso_values[str[len - 1]]; + return hval + asso_values[(unsigned char)str[len - 1]]; } #ifdef __GNUC__ -inline +__inline #endif const struct keyword_t * -charmap_hash (register const char *str, register int len) +charmap_hash (register const char *str, register unsigned int len) { static const struct keyword_t wordlist[] = { - {"",}, {"",}, {"",}, - {"END", tok_end, 0}, - {"",}, - {"WIDTH", tok_width, 0}, - {"",}, - {"CHARMAP", tok_charmap, 0}, - {"",}, - {"g3esc", tok_g3esc, 1}, - {"mb_cur_max", tok_mb_cur_max, 1}, - {"escape_char", tok_escape_char, 1}, - {"comment_char", tok_comment_char, 1}, - {"code_set_name", tok_code_set_name, 1}, - {"WIDTH_VARIABLE", tok_width_variable, 0}, - {"g1esc", tok_g1esc, 1}, - {"",}, {"",}, - {"WIDTH_DEFAULT", tok_width_default, 0}, - {"g0esc", tok_g0esc, 1}, - {"g2esc", tok_g2esc, 1}, - {"",}, {"",}, {"",}, {"",}, - {"mb_cur_min", tok_mb_cur_min, 1}, + {""}, {""}, {""}, + {"END", tok_end, 0}, + {""}, + {"WIDTH", tok_width, 0}, + {"escseq", tok_escseq, 1}, + {"include", tok_include, 1}, + {""}, {""}, + {"mb_cur_min", tok_mb_cur_min, 1}, + {"escape_char", tok_escape_char, 1}, + {"comment_char", tok_comment_char, 1}, + {"code_set_name", tok_code_set_name, 1}, + {"WIDTH_VARIABLE", tok_width_variable, 0}, + {"g1esc", tok_g1esc, 1}, + {"addset", tok_addset, 1}, + {"CHARMAP", tok_charmap, 0}, + {"WIDTH_DEFAULT", tok_width_default, 0}, + {""}, + {"g2esc", tok_g2esc, 1}, + {""}, {""}, {""}, {""}, + {"g3esc", tok_g3esc, 1}, + {""}, {""}, {""}, {""}, + {"g0esc", tok_g0esc, 1}, + {""}, {""}, {""}, {""}, + {"mb_cur_max", tok_mb_cur_max, 1} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) @@ -109,7 +131,7 @@ charmap_hash (register const char *str, register int len) { register const char *s = wordlist[key].name; - if (*s == *str && !strncmp (str + 1, s + 1, len - 1)) + if (*str == *s && !strncmp (str + 1, s + 1, len - 1)) return &wordlist[key]; } } diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c index fd9cc357e3..6db2b420a6 100644 --- a/locale/programs/charmap.c +++ b/locale/programs/charmap.c @@ -1,6 +1,6 @@ -/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper , 1996. + Contributed by Ulrich Drepper , 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -32,13 +33,10 @@ #include "error.h" #include "linereader.h" -#include "charset.h" +#include "charmap.h" #include "locfile.h" #include "repertoire.h" - -/* Uncomment following line for production version. */ -/* define NDEBUG 1 */ #include @@ -49,17 +47,20 @@ extern void *xmalloc (size_t __n); /* Prototypes for local functions. */ -static struct charset_t *parse_charmap (const char *filename); -static void new_width (struct linereader *cmfile, struct charset_t *result, +static struct charmap_t *parse_charmap (const char *filename); +static void new_width (struct linereader *cmfile, struct charmap_t *result, const char *from, const char *to, unsigned long int width); +static void charmap_new_char (struct linereader *lr, struct charmap_t *cm, + int nbytes, char *bytes, const char *from, + const char *to, int decimal_ellipsis); -struct charset_t * +struct charmap_t * charmap_read (const char *filename) { const char *pathnfile; - struct charset_t *result = NULL; + struct charmap_t *result = NULL; if (filename != NULL) { @@ -175,16 +176,17 @@ charmap_read (const char *filename) } -static struct charset_t * +static struct charmap_t * parse_charmap (const char *filename) { struct linereader *cmfile; - struct charset_t *result; + struct charmap_t *result; int state; enum token_t expected_tok = tok_error; const char *expected_str = NULL; char *from_name = NULL; char *to_name = NULL; + enum token_t ellipsis = 0; /* Determine path. */ cmfile = lr_open (filename, charmap_hash); @@ -206,9 +208,12 @@ parse_charmap (const char *filename) return NULL; } + /* We don't want symbolic names in string to be translated. */ + cmfile->translate_strings = 0; + /* Allocate room for result. */ - result = (struct charset_t *) xmalloc (sizeof (struct charset_t)); - memset (result, '\0', sizeof (struct charset_t)); + result = (struct charmap_t *) xmalloc (sizeof (struct charmap_t)); + memset (result, '\0', sizeof (struct charmap_t)); /* The default DEFAULT_WIDTH is 1. */ result->width_default = 1; @@ -216,7 +221,8 @@ parse_charmap (const char *filename) #define obstack_chunk_free free obstack_init (&result->mem_pool); - if (init_hash (&result->char_table, 256)) + if (init_hash (&result->char_table, 256) + || init_hash (&result->byte_table, 256)) { free (result); return NULL; @@ -228,7 +234,7 @@ parse_charmap (const char *filename) while (1) { /* What's on? */ - struct token *now = lr_token (cmfile, NULL); + struct token *now = lr_token (cmfile, NULL, NULL); enum token_t nowtok = now->tok; struct token *arg; @@ -275,22 +281,24 @@ parse_charmap (const char *filename) && nowtok != tok_mb_cur_min && nowtok != tok_escape_char && nowtok != tok_comment_char && nowtok != tok_g0esc && nowtok != tok_g1esc && nowtok != tok_g2esc - && nowtok != tok_g3esc) + && nowtok != tok_g3esc && nowtok != tok_repertoiremap + && nowtok != tok_include) { lr_error (cmfile, _("syntax error in prolog: %s"), - _("illegal definition")); + _("invalid definition")); lr_ignore_rest (cmfile, 0); continue; } /* We know that we need an argument. */ - arg = lr_token (cmfile, NULL); + arg = lr_token (cmfile, NULL, NULL); switch (nowtok) { case tok_code_set_name: - if (arg->tok != tok_ident && arg->tok != tok_string) + case tok_repertoiremap: + if (arg->tok != tok_ident) { badarg: lr_error (cmfile, _("syntax error in prolog: %s"), @@ -300,9 +308,14 @@ parse_charmap (const char *filename) continue; } - result->code_set_name = obstack_copy0 (&result->mem_pool, - arg->val.str.start, - arg->val.str.len); + if (nowtok == tok_code_set_name) + result->code_set_name = obstack_copy0 (&result->mem_pool, + arg->val.str.startmb, + arg->val.str.lenmb); + else + result->repertoiremap = obstack_copy0 (&result->mem_pool, + arg->val.str.startmb, + arg->val.str.lenmb); lr_ignore_rest (cmfile, 1); continue; @@ -312,12 +325,21 @@ parse_charmap (const char *filename) if (arg->tok != tok_number) goto badarg; - if (arg->val.num < 1 || arg->val.num > 4) + if (verbose + && ((nowtok == tok_mb_cur_max + && result->mb_cur_max != 0) + || (nowtok == tok_mb_cur_max + && result->mb_cur_max != 0))) + lr_error (cmfile, _("duplicate definition of <%s>"), + nowtok == tok_mb_cur_min + ? "mb_cur_min" : "mb_cur_max"); + + if (arg->val.num < 1) { lr_error (cmfile, - _("value for <%s> must lie between 1 and 4"), - nowtok == tok_mb_cur_min ? "mb_cur_min" - : "mb_cur_max"); + _("value for <%s> must be 1 or greater"), + nowtok == tok_mb_cur_min + ? "mb_cur_min" : "mb_cur_max"); lr_ignore_rest (cmfile, 0); continue; @@ -328,7 +350,8 @@ parse_charmap (const char *filename) && (int) arg->val.num > result->mb_cur_max)) { lr_error (cmfile, _("\ -value of must be greater than the value of ")); +value of <%s> must be greater or equal than the value of <%s>"), + "mb_cur_max", "mb_cur_min"); lr_ignore_rest (cmfile, 0); continue; @@ -347,7 +370,7 @@ value of must be greater than the value of ")); if (arg->tok != tok_ident) goto badarg; - if (arg->val.str.len != 1) + if (arg->val.str.lenmb != 1) { lr_error (cmfile, _("\ argument to <%s> must be a single character"), @@ -359,9 +382,9 @@ argument to <%s> must be a single character"), } if (nowtok == tok_escape_char) - cmfile->escape_char = *arg->val.str.start; + cmfile->escape_char = *arg->val.str.startmb; else - cmfile->comment_char = *arg->val.str.start; + cmfile->comment_char = *arg->val.str.startmb; lr_ignore_rest (cmfile, 1); continue; @@ -370,9 +393,15 @@ argument to <%s> must be a single character"), case tok_g1esc: case tok_g2esc: case tok_g3esc: + case tok_escseq: lr_ignore_rest (cmfile, 0); /* XXX */ continue; + case tok_include: + lr_error (cmfile, _("\ +character sets with locking states are not supported")); + exit (4); + default: /* Cannot happen. */ assert (! "Should not happen"); @@ -409,8 +438,8 @@ argument to <%s> must be a single character"), obstack_free (&result->mem_pool, from_name); from_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); to_name = NULL; state = 3; @@ -419,19 +448,20 @@ argument to <%s> must be a single character"), case 3: /* We have two possibilities: We can see an ellipsis or an encoding value. */ - if (nowtok == tok_ellipsis) + if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4 + || nowtok == tok_ellipsis2) { + ellipsis = nowtok; state = 4; continue; } /* FALLTHROUGH */ case 5: - if (nowtok != tok_charcode && nowtok != tok_ucs2 - && nowtok != tok_ucs4) + if (nowtok != tok_charcode) { lr_error (cmfile, _("syntax error in %s definition: %s"), - "CHARMAP", _("illegal encoding given")); + "CHARMAP", _("invalid encoding given")); lr_ignore_rest (cmfile, 0); @@ -444,9 +474,9 @@ argument to <%s> must be a single character"), else if (now->val.charcode.nbytes > result->mb_cur_max) lr_error (cmfile, _("too many bytes in character encoding")); else - charset_new_char (cmfile, &result->char_table, - now->val.charcode.nbytes, - now->val.charcode.val, from_name, to_name); + charmap_new_char (cmfile, result, now->val.charcode.nbytes, + now->val.charcode.bytes, from_name, to_name, + ellipsis != tok_ellipsis2); /* Ignore trailing comment silently. */ lr_ignore_rest (cmfile, 0); @@ -470,8 +500,8 @@ argument to <%s> must be a single character"), /* Copy the to-name in a safe place. */ to_name = (char *) obstack_copy0 (&result->mem_pool, - cmfile->token.val.str.start, - cmfile->token.val.str.len); + cmfile->token.val.str.startmb, + cmfile->token.val.str.lenmb); state = 5; continue; @@ -557,15 +587,15 @@ only WIDTH definitions are allowed to follow the CHARMAP definition")); obstack_free (&result->mem_pool, from_name); from_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); to_name = NULL; state = 94; continue; case 94: - if (nowtok == tok_ellipsis) + if (nowtok == tok_ellipsis3) { state = 95; continue; @@ -602,8 +632,8 @@ only WIDTH definitions are allowed to follow the CHARMAP definition")); } to_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); state = 96; continue; @@ -637,15 +667,15 @@ only WIDTH definitions are allowed to follow the CHARMAP definition")); obstack_free (&result->mem_pool, from_name); from_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); to_name = NULL; state = 99; continue; case 99: - if (nowtok == tok_ellipsis) + if (nowtok == tok_ellipsis3) state = 100; /* Store info. */ @@ -663,8 +693,8 @@ only WIDTH definitions are allowed to follow the CHARMAP definition")); else { to_name = (char *) obstack_copy0 (&result->mem_pool, - now->val.str.start, - now->val.str.len); + now->val.str.startmb, + now->val.str.lenmb); /* XXX Enter value into table. */ } @@ -690,13 +720,14 @@ only WIDTH definitions are allowed to follow the CHARMAP definition")); static void -new_width (struct linereader *cmfile, struct charset_t *result, +new_width (struct linereader *cmfile, struct charmap_t *result, const char *from, const char *to, unsigned long int width) { - unsigned int from_val, to_val; + struct charseq *from_val; + struct charseq *to_val; - from_val = charset_find_value (&result->char_table, from, strlen (from)); - if ((wchar_t) from_val == ILLEGAL_CHAR_VALUE) + from_val = charmap_find_value (result, from, strlen (from)); + if (from_val == NULL) { lr_error (cmfile, _("unknown character `%s'"), from); return; @@ -706,8 +737,8 @@ new_width (struct linereader *cmfile, struct charset_t *result, to_val = from_val; else { - to_val = charset_find_value (&result->char_table, to, strlen (to)); - if ((wchar_t) to_val == ILLEGAL_CHAR_VALUE) + to_val = charmap_find_value (result, to, strlen (to)); + if (to_val == NULL) { lr_error (cmfile, _("unknown character `%s'"), to); return; @@ -734,3 +765,140 @@ new_width (struct linereader *cmfile, struct charset_t *result, result->width_rules[result->nwidth_rules].width = (unsigned int) width; ++result->nwidth_rules; } + + +struct charseq * +charmap_find_value (const struct charmap_t *cm, const char *name, size_t len) +{ + void *result; + + return (find_entry ((hash_table *) &cm->char_table, name, len, &result) + < 0 ? NULL : (struct charseq *) result); +} + + +static void +charmap_new_char (struct linereader *lr, struct charmap_t *cm, + int nbytes, char *bytes, const char *from, const char *to, + int decimal_ellipsis) +{ + hash_table *ht = &cm->char_table; + hash_table *bt = &cm->byte_table; + struct obstack *ob = &cm->mem_pool; + char *from_end; + char *to_end; + const char *cp; + int prefix_len, len1, len2; + unsigned int from_nr, to_nr, cnt; + struct charseq *newp; + + len1 = strlen (from); + + if (to == NULL) + { + newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); + newp->nbytes = nbytes; + memcpy (newp->bytes, bytes, nbytes); + newp->name = obstack_copy (ob, from, len1 + 1); + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + + insert_entry (ht, from, len1, newp); + insert_entry (bt, newp->bytes, nbytes, newp); + /* Please note that it isn't a bug if a symbol is defined more + than once. All later definitions are simply discarded. */ + return; + } + + /* We have a range: the names must have names with equal prefixes + and an equal number of digits, where the second number is greater + or equal than the first. */ + len2 = strlen (to); + + if (len1 != len2) + { + illegal_range: + lr_error (lr, _("invalid names for character range")); + return; + } + + cp = &from[len1 - 1]; + if (decimal_ellipsis) + while (isdigit (*cp) && cp >= from) + --cp; + else + while (isxdigit (*cp) && cp >= from) + { + if (!isdigit (*cp) && !isupper (*cp)) + lr_error (lr, _("\ +hexadecimal range format should use only capital characters")); + --cp; + } + + prefix_len = (cp - from) + 1; + + if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0) + goto illegal_range; + + errno = 0; + from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16); + if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE) + || ((to_nr = strtoul (&to[prefix_len], &to_end, + decimal_ellipsis ? 10 : 16)) == ULONG_MAX + && errno == ERANGE) + || *to_end != '\0') + { + lr_error (lr, _("<%s> and <%s> are illegal names for range")); + return; + } + + if (from_nr > to_nr) + { + lr_error (lr, _("upper limit in range is not higher then lower limit")); + return; + } + + for (cnt = from_nr; cnt <= to_nr; ++cnt) + { + char *name_end; + obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X", + prefix_len, from, len1 - prefix_len, cnt); + name_end = obstack_finish (ob); + + newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); + newp->nbytes = nbytes; + memcpy (newp->bytes, bytes, nbytes); + newp->name = name_end; + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + + insert_entry (ht, name_end, len1, newp); + insert_entry (bt, newp->bytes, nbytes, newp); + /* Please note we don't examine the return value since it is no error + if we have two definitions for a symbol. */ + + /* Increment the value in the byte sequence. */ + if (++bytes[nbytes - 1] == '\0') + { + int b = nbytes - 2; + + do + if (b < 0) + { + lr_error (lr, + _("resulting bytes for range not representable.")); + return; + } + while (++bytes[b--] == 0); + } + } +} + + +struct charseq * +charmap_find_symbol (const struct charmap_t *cm, const char *bytes, + size_t nbytes) +{ + void *result; + + return (find_entry ((hash_table *) &cm->byte_table, bytes, nbytes, &result) + < 0 ? NULL : (struct charseq *) result); +} diff --git a/locale/programs/charmap.h b/locale/programs/charmap.h new file mode 100644 index 0000000000..88fd078ed9 --- /dev/null +++ b/locale/programs/charmap.h @@ -0,0 +1,78 @@ +/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifndef _CHARMAP_H +#define _CHARMAP_H + +#include + +#include "repertoire.h" +#include "simple-hash.h" + + +struct width_rule +{ + struct charseq *from; + struct charseq *to; + unsigned int width; +}; + + +struct charmap_t +{ + const char *code_set_name; + const char *repertoiremap; + int mb_cur_min; + int mb_cur_max; + + struct width_rule *width_rules; + size_t nwidth_rules; + size_t nwidth_rules_max; + unsigned int width_default; + + struct obstack mem_pool; + hash_table char_table; + hash_table byte_table; + hash_table ucs4_table; +}; + + +/* This is the structure used for entries in the hash table. It represents + the sequence of bytes used for the coded character. */ +struct charseq +{ + const char *name; + uint32_t ucs4; + int nbytes; + unsigned char bytes[0]; +}; + + +/* Prototypes for charmap handling functions. */ +extern struct charmap_t *charmap_read (const char *filename); + +/* Return the value stored under the given key in the hashing table. */ +extern struct charseq *charmap_find_value (const struct charmap_t *charmap, + const char *name, size_t len); + +/* Return symbol for given multibyte sequence. */ +extern struct charseq *charmap_find_symbol (const struct charmap_t *charmap, + const char *name, size_t len); + +#endif /* charmap.h */ diff --git a/locale/programs/charset.h b/locale/programs/charset.h deleted file mode 100644 index 8f066b115b..0000000000 --- a/locale/programs/charset.h +++ /dev/null @@ -1,74 +0,0 @@ -/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper , 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -#ifndef _CHARSET_H -#define _CHARSET_H - -#include - -#include "repertoire.h" -#include "simple-hash.h" -#include "linereader.h" - - -struct width_rule -{ - unsigned int from; - unsigned int to; - unsigned int width; -}; - - -struct charset_t -{ - const char *code_set_name; - int mb_cur_min; - int mb_cur_max; - - struct width_rule *width_rules; - size_t nwidth_rules; - size_t nwidth_rules_max; - unsigned int width_default; - - struct obstack mem_pool; - hash_table char_table; -}; - - -/* We need one value to mark the error case. Let's use 0xffffffff. - I.e., it is placed in the last page of ISO 10646. For now only the - first is used and we have plenty of room. */ -#define ILLEGAL_CHAR_VALUE ((wchar_t) 0xffffffffu) - - -/* Declared in localedef.c. */ -extern int be_quiet; - -/* Prototypes for charmap handling functions. */ -struct charset_t *charmap_read (const char *filename); - -/* Prototypes for function to insert new character. */ -void charset_new_char (struct linereader *lr, hash_table *ht, int bytes, - unsigned int value, const char *from, const char *to); - -/* Return the value stored under the given key in the hashing table. */ -unsigned int charset_find_value (const hash_table *ht, - const char *name, size_t len); - -#endif /* charset.h */ diff --git a/locale/programs/config.h b/locale/programs/config.h index 9775572849..a293da3b09 100644 --- a/locale/programs/config.h +++ b/locale/programs/config.h @@ -1,5 +1,25 @@ +/* Configuration for localedef program. + Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 1995. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + #ifndef _LD_CONFIG_H -#define _LD_CONFIG_H +#define _LD_CONFIG_H 1 /* Use the internal textdomain used for libc messages. */ #define PACKAGE _libc_intl_domainname @@ -18,12 +38,8 @@ # endif #endif - - -#define HAVE_VPRINTF 1 -#define HAVE_STRING_H 1 - +/* This must be one higer than the last used LC_xxx category value. */ +#define __LC_LAST 13 #include_next - #endif diff --git a/locale/programs/ld-address.c b/locale/programs/ld-address.c new file mode 100644 index 0000000000..805330cfaf --- /dev/null +++ b/locale/programs/ld-address.c @@ -0,0 +1,514 @@ +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include +#include + +#include + +#include "localeinfo.h" +#include "locfile.h" + + +static struct +{ + const char ab2[2]; + const char ab3[3]; + uint32_t num; +} iso3166[] = +{ +#define DEFINE_COUNTRY_CODE(Name, Ab2, Ab3, Num) \ + { #Ab2, #Ab3, Num }, +#include "iso-3166.def" +}; + + +static struct +{ + const char ab[2]; + const char term[3]; + const char lib[3]; +} iso639[] = +{ +#define DEFINE_LANGUAGE_CODE(Name, Ab, Term, Lib) \ + { #Ab, #Term, #Lib }, +#include "iso-639.def" +}; + + +/* The real definition of the struct for the LC_ADDRESS locale. */ +struct locale_address_t +{ + const char *postal_fmt; + const char *country_name; + const char *country_post; + const char *country_ab2; + const char *country_ab3; + uint32_t country_num; + uint32_t country_num_ob; + const char *country_car; + const char *country_isbn; + const char *lang_name; + const char *lang_ab; + const char *lang_term; + const char *lang_lib; +}; + + +static void +address_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_ADDRESS].address = + (struct locale_address_t *) xcalloc (1, + sizeof (struct locale_address_t)); + + lr->translate_strings = 1; + lr->return_widestr = 0; +} + + +void +address_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ + struct locale_address_t *address = locale->categories[LC_ADDRESS].address; + size_t cnt; + int helper; + + if (address->postal_fmt == NULL) + { + error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "postal_fmt"); + /* Use as the default value the value of the i18n locale. */ + address->postal_fmt = "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"; + } + else + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = address->postal_fmt; + + if (*cp == '\0') + error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "postal_fmt"); + else + while (*cp != '\0') + { + if (*cp == '%') + { + if (*++cp == 'R') + /* Romanize-flag. */ + ++cp; + if (strchr ("afdbshNtreCzTc%", *cp) == NULL) + { + error (0, 0, _("\ +%s: invalid escape `%%%c' sequence in field `%s'"), + "LC_ADDRESS", *cp, "postal_fmt"); + break; + } + } + ++cp; + } + } + +#define TEST_ELEM(cat) \ + if (address->cat == NULL) \ + { \ + if (verbose) \ + error (0, 0, _("%s: field `%s' not defined"), "LC_ADDRESS", #cat); \ + address->cat = ""; \ + } + + TEST_ELEM (country_name); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_post); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_car); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_isbn); + TEST_ELEM (lang_name); + + helper = 1; + if (address->lang_term == NULL) + { + if (verbose) + error (0, 0, _("%s: field `%s' not defined"), "LC_ADDRESS", + "lang_term"); + address->lang_term = ""; + cnt = sizeof (iso639) / sizeof (iso639[0]); + } + else if (address->lang_term[0] == '\0') + { + if (verbose) + error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_term"); + cnt = sizeof (iso639) / sizeof (iso639[0]); + } + else + { + /* Look for this language in the table. */ + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_term, iso639[cnt].term) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + error (0, 0, _("\ +%s: terminology language code `%s' not defined"), + "LC_ADDRESS", address->lang_term); + } + + if (address->lang_ab == NULL) + { + if (verbose) + error (0, 0, _("%s: field `%s' not defined"), "LC_ADDRESS", "lang_ab"); + address->lang_ab = ""; + } + else if (address->lang_ab[0] == '\0') + { + if (verbose) + error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_ab"); + } + else + { + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + { + helper = 2; + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_ab, iso639[cnt].ab) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + error (0, 0, _("\ +%s: language abbreviation `%s' not defined"), + "LC_ADDRESS", address->lang_ab); + } + else + if (strcmp (iso639[cnt].ab, address->lang_ab) != 0) + error (0, 0, _("\ +%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "lang_ab", "lang_term"); + } + + if (address->lang_lib == NULL) + /* This is no error. */ + address->lang_lib = address->lang_term; + else if (address->lang_lib[0] == '\0') + { + if (verbose) + error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_lib"); + } + else + { + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + { + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_lib, iso639[cnt].lib) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + error (0, 0, _("\ +%s: language abbreviation `%s' not defined"), + "LC_ADDRESS", address->lang_lib); + } + else + if (strcmp (iso639[cnt].ab, address->lang_ab) != 0) + error (0, 0, _("\ +%s: `%s' value does not match `%s' value"), "LC_ADDRESS", "lang_lib", + helper == 1 ? "lang_term" : "lang_ab"); + } + + if (address->country_num == 0) + { + if (verbose) + error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_num"); + cnt = sizeof (iso3166) / sizeof (iso3166[0]); + } + else + { + for (cnt = 0; cnt < sizeof (iso3166) / sizeof (iso3166[0]); ++cnt) + if (address->country_num == iso3166[cnt].num) + break; + + if (cnt == sizeof (iso3166) / sizeof (iso3166[0])) + error (0, 0, _("\ +%s: numeric country code `%d' not valid"), + "LC_ADDRESS", address->country_num); + } + address->country_num_ob = bswap_32 (address->country_num); + + if (address->country_ab2 == NULL) + { + if (verbose) + error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_ab2"); + address->country_ab2 = " "; + } + else if (cnt != sizeof (iso3166) / sizeof (iso3166[0]) + && strcmp (address->country_ab2, iso3166[cnt].ab2) != 0) + error (0, 0, _("%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "country_ab2", "country_num"); + + if (address->country_ab3 == NULL) + { + if (verbose) + error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_ab3"); + address->country_ab3 = " "; + } + else if (cnt != sizeof (iso3166) / sizeof (iso3166[0]) + && strcmp (address->country_ab3, iso3166[cnt].ab3) != 0) + error (0, 0, _("%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "country_ab3", "country_num"); +} + + +void +address_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ + struct locale_address_t *address = locale->categories[LC_ADDRESS].address; + struct iovec iov[2 + _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS)]; + struct locale_file data; + uint32_t idx[_NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS)]; + size_t cnt = 0; + + data.magic = LIMAGIC (LC_ADDRESS); + data.n = _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS); + iov[cnt].iov_base = (void *) &data; + iov[cnt].iov_len = sizeof (data); + ++cnt; + + iov[cnt].iov_base = (void *) idx; + iov[cnt].iov_len = sizeof (idx); + ++cnt; + + idx[cnt - 2] = iov[0].iov_len + iov[1].iov_len; + iov[cnt].iov_base = (void *) address->postal_fmt; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_name; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_post; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_ab2; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_ab3; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_car; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define country_num_eb country_num_ob +# define country_num_el country_num +#else +# define country_num_eb country_num +# define country_num_el country_num_ob +#endif + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_num_eb; + iov[cnt].iov_len = sizeof (uint32_t); + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_num_el; + iov[cnt].iov_len = sizeof (uint32_t); + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->country_isbn; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->lang_name; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->lang_ab; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->lang_term; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + idx[cnt - 2] = idx[cnt - 3] + iov[cnt - 1].iov_len; + iov[cnt].iov_base = (void *) address->lang_lib; + iov[cnt].iov_len = strlen (iov[cnt].iov_base) + 1; + ++cnt; + + assert (cnt == 2 + _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS)); + + write_locale_data (output_path, "LC_ADDRESS", + 2 + _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS), iov); +} + + +/* The parser for the LC_ADDRESS section of the locale definition. */ +void +address_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_address_t *address; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_ADDRESS' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_address, LC_ADDRESS, + "LC_ADDRESS", ignore_content); + return; + } + + /* Prepare the data structures. */ + address_startup (ldfile, result, ignore_content); + address = result->categories[LC_ADDRESS].address; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + arg = lr_token (ldfile, charmap, NULL); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (address->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_ADDRESS", #cat); \ + address->cat = ""; \ + } \ + else if (!ignore_content) \ + address->cat = arg->val.str.startmb; \ + break + + STR_ELEM (postal_fmt); + STR_ELEM (country_name); + STR_ELEM (country_post); + STR_ELEM (country_ab2); + STR_ELEM (country_ab3); + STR_ELEM (country_car); + STR_ELEM (country_isbn); + STR_ELEM (lang_name); + STR_ELEM (lang_ab); + STR_ELEM (lang_term); + STR_ELEM (lang_lib); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + arg = lr_token (ldfile, charmap, NULL); \ + if (arg->tok != tok_number) \ + goto err_label; \ + else if (address->cat != 0) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \ + else if (!ignore_content) \ + address->cat = arg->val.num; \ + break + + INT_ELEM (country_num); + + case tok_end: + /* Next we assume `LC_ADDRESS'. */ + arg = lr_token (ldfile, charmap, NULL); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_ADDRESS"); + else if (arg->tok != tok_lc_address) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_ADDRESS"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_address); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_ADDRESS"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_ADDRESS"); +} diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c index 265bfd0af1..3c1267420c 100644 --- a/locale/programs/ld-collate.c +++ b/locale/programs/ld-collate.c @@ -1,6 +1,6 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper , 1995. + Contributed by Ulrich Drepper , 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -21,32 +21,1034 @@ # include #endif -#include -#include -#include -#include -#include +#include #include -#include -#include -#include +#include "charmap.h" #include "localeinfo.h" -#include "locales.h" -#include "simple-hash.h" -#include "stringtrans.h" -#include "strlen-hash.h" +#include "linereader.h" +#include "locfile.h" +#include "localedef.h" /* Uncomment the following line in the production version. */ /* #define NDEBUG 1 */ #include +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + +/* Forward declaration. */ +struct element_t; + +/* Data type for list of strings. */ +struct section_list +{ + struct section_list *next; + /* Name of the section. */ + const char *name; + /* First element of this section. */ + struct element_t *first; + /* Last element of this section. */ + struct element_t *last; + /* These are the rules for this section. */ + enum coll_sort_rule *rules; +}; + +/* Data type for collating element. */ +struct element_t +{ + const char *mbs; + const uint32_t *wcs; + int order; + + struct element_t **weights; + + /* Where does the definition come from. */ + const char *file; + size_t line; + + /* Which section does this belong to. */ + struct section_list *section; + + /* Predecessor and successor in the order list. */ + struct element_t *last; + struct element_t *next; +}; + +/* Data type for collating symbol. */ +struct symbol_t +{ + /* Point to place in the order list. */ + struct element_t *order; + + /* Where does the definition come from. */ + const char *file; + size_t line; +}; + + +/* The real definition of the struct for the LC_COLLATE locale. */ +struct locale_collate_t +{ + int col_weight_max; + int cur_weight_max; + + /* List of known scripts. */ + struct section_list *sections; + /* Current section using definition. */ + struct section_list *current_section; + /* There always can be an unnamed section. */ + struct section_list unnamed_section; + /* To make handling of errors easier we have another section. */ + struct section_list error_section; + + /* Number of sorting rules given in order_start line. */ + uint32_t nrules; + + /* Start of the order list. */ + struct element_t *start; + + /* The undefined element. */ + struct element_t undefined; -#define MAX(a, b) ((a) > (b) ? (a) : (b)) + /* This is the cursor for `reorder_after' insertions. */ + struct element_t *cursor; -#define SWAPU32(w) \ - (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) + /* Remember whether last weight was an ellipsis. */ + int was_ellipsis; + + /* Known collating elements. */ + hash_table elem_table; + + /* Known collating symbols. */ + hash_table sym_table; + + /* Known collation sequences. */ + hash_table seq_table; + + struct obstack mempool; + + /* The LC_COLLATE category is a bit special as it is sometimes possible + that the definitions from more than one input file contains information. + Therefore we keep all relevant input in a list. */ + struct locale_collate_t *next; +}; + + +/* We have a few global variables which are used for reading all + LC_COLLATE category descriptions in all files. */ +static int nrules; + + +static struct section_list * +make_seclist_elem (struct locale_collate_t *collate, const char *string, + struct section_list *next) +{ + struct section_list *newp; + + newp = (struct section_list *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->next = next; + newp->name = string; + newp->first = NULL; + + return newp; +} + + +static struct element_t * +new_element (struct locale_collate_t *collate, const char *mbs, + const uint32_t *wcs) +{ + struct element_t *newp; + + newp = (struct element_t *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->mbs = mbs; + newp->wcs = wcs; + newp->order = 0; + + newp->file = NULL; + newp->line = 0; + + newp->section = NULL; + + newp->last = NULL; + newp->next = NULL; + + return newp; +} + + +static struct symbol_t * +new_symbol (struct locale_collate_t *collate) +{ + struct symbol_t *newp; + newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp)); + + newp->order = NULL; + + newp->file = NULL; + newp->line = 0; + + return newp; +} + + +/* Test whether this name is already defined somewhere. */ +static int +check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate, + struct charmap_t *charmap, struct repertoire_t *repertoire, + const char *symbol, size_t symbol_len) +{ + void *ignore = NULL; + + if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined in charmap"), symbol); + return 1; + } + + if (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined in repertoire"), symbol); + return 1; + } + + if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined as collating symbol"), symbol); + return 1; + } + + if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%s' already defined as collating element"), + symbol); + return 1; + } + + return 0; +} + + +/* Read the direction specification. */ +static void +read_directions (struct linereader *ldfile, struct token *arg, + struct charmap_t *charmap, struct repertoire_t *repertoire, + struct locale_collate_t *collate) +{ + int cnt = 0; + int max = nrules ?: 10; + enum coll_sort_rule *rules = calloc (max, sizeof (*rules)); + int warned = 0; + + while (1) + { + int valid = 0; + + if (arg->tok == tok_forward) + { + if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned twice in definition of weight %d"), + "LC_COLLATE", "forward", cnt + 1); + } + } + else + rules[cnt] |= sort_forward; + + valid = 1; + } + else if (arg->tok == tok_backward) + { + if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned twice in definition of weight %d"), + "LC_COLLATE", "backward", cnt + 1); + } + } + else + rules[cnt] |= sort_backward; + + valid = 1; + } + else if (arg->tok == tok_position) + { + if (rules[cnt] & sort_position) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned twice in definition of weight %d in category `%s'"), + "LC_COLLATE", "position", cnt + 1); + } + } + else + rules[cnt] |= sort_position; + + valid = 1; + } + + if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma + || arg->tok == tok_semicolon) + { + if (! valid && ! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + + /* See whether we have to increment the counter. */ + if (arg->tok != tok_comma && rules[cnt] != 0) + ++cnt; + + if (arg->tok == tok_eof || arg->tok == tok_eol) + /* End of line or file, so we exit the loop. */ + break; + + if (nrules == 0) + { + /* See whether we have enough room in the array. */ + if (cnt == max) + { + max += 10; + rules = (enum coll_sort_rule *) xrealloc (rules, + max + * sizeof (*rules)); + memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules)); + } + } + else + { + if (cnt == nrules) + { + /* There must not be any more rule. */ + if (! warned) + { + lr_error (ldfile, _("\ +%s: too many rules; first entry only had %d"), + "LC_COLLATE", nrules); + warned = 1; + } + + lr_ignore_rest (ldfile, 0); + break; + } + } + } + else + { + if (! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + } + + arg = lr_token (ldfile, charmap, repertoire); + } + + if (nrules == 0) + { + /* Now we know how many rules we have. */ + nrules = cnt; + rules = (enum coll_sort_rule *) xrealloc (rules, + nrules * sizeof (*rules)); + } + else + { + if (cnt < nrules) + { + /* Not enough rules in this specification. */ + if (! warned) + lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE"); + + do + rules[cnt] = sort_forward; + while (++cnt < nrules); + } + } + + collate->current_section->rules = rules; +} + + +static void +insert_value (struct linereader *ldfile, struct token *arg, + struct charmap_t *charmap, struct repertoire_t *repertoire, + struct locale_collate_t *collate) +{ + /* First find out what kind of symbol this is. */ + struct charseq *seq; + uint32_t wc; + struct element_t *elem = NULL; + int weight_cnt; + + /* First determine the wide character. There must be such a value, + otherwise we ignore it (if it is no collatio symbol or element). */ + wc = repertoire_find_value (repertoire, arg->val.str.startmb, + arg->val.str.lenmb); + + /* Try to find the character in the charmap. */ + seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb); + + if (wc == ILLEGAL_CHAR_VALUE) + { + /* It's no character, so look through the collation elements and + symbol list. */ + void *result; + + if (find_entry (&collate->sym_table, arg->val.str.startmb, + arg->val.str.lenmb, &result) == 0) + { + /* It's a collation symbol. */ + struct symbol_t *sym = (struct symbol_t *) result; + elem = sym->order; + } + else if (find_entry (&collate->elem_table, arg->val.str.startmb, + arg->val.str.lenmb, &result) != 0) + /* It's also no collation element. Therefore ignore it. */ + return; + } + + /* XXX elem must be defined. */ + + /* Test whether this element is not already in the list. */ + if (elem->next != NULL) + { + lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"), + arg->val.str.startmb, arg->val.str.lenmb, + elem->file, elem->line); + return; + } + + /* Initialize all the fields. */ + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + elem->last = collate->cursor; + elem->next = collate->cursor ? collate->cursor->next : NULL; + elem->weights = (struct element_t **) + obstack_alloc (&collate->mempool, nrules * sizeof (struct element_t *)); + memset (elem->weights, '\0', nrules * sizeof (struct element_t *)); + + if (collate->current_section->first == NULL) + collate->current_section->first = elem; + if (collate->current_section->last == collate->cursor) + collate->current_section->last = elem; + + collate->cursor = elem; + + /* Now read the rest of the line. */ + ldfile->return_widestr = 1; + + weight_cnt = 0; + do + { + arg = lr_token (ldfile, charmap, repertoire); + + if (arg->tok == tok_eof || arg->tok == tok_eol) + { + /* This means the rest of the line uses the current element + as the weight. */ + do + elem->weights[weight_cnt] = elem; + while (++weight_cnt < nrules); + + return; + } + + if (arg->tok == tok_ignore) + { + /* The weight for this level has to be ignored. We use the + null pointer to indicate this. */ + } + else if (arg->tok == tok_bsymbol) + { + + } + } + while (++weight_cnt < nrules); + + lr_ignore_rest (ldfile, weight_cnt == nrules); +} + + +static void +collate_startup (struct linereader *ldfile, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + { + struct locale_collate_t *collate; + + collate = locale->categories[LC_COLLATE].collate = + (struct locale_collate_t *) xcalloc (1, + sizeof (struct locale_collate_t)); + + /* Init the various data structures. */ + init_hash (&collate->elem_table, 100); + init_hash (&collate->sym_table, 100); + init_hash (&collate->seq_table, 500); + obstack_init (&collate->mempool); + + collate->col_weight_max = -1; + } + + ldfile->translate_strings = 1; + ldfile->return_widestr = 0; +} + + +void +collate_finish (struct localedef_t *locale, struct charmap_t *charmap) +{ +} + + +void +collate_output (struct localedef_t *locale, struct charmap_t *charmap, + const char *output_path) +{ +} + + +void +collate_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_collate_t *collate; + struct token *now; + struct token *arg; + enum token_t nowtok; + int state = 0; + int