diff options
| author | Ulrich Drepper <drepper@redhat.com> | 1998-04-30 16:57:48 +0000 |
|---|---|---|
| committer | Ulrich Drepper <drepper@redhat.com> | 1998-04-30 16:57:48 +0000 |
| commit | 69f155d4fc11f2f0e1dd1bfcd804192303ba1627 (patch) | |
| tree | ced40eb216ea58b252952737a6747f19d1db8e4f /locale/programs | |
| parent | 3dd2c3e24702b91473b30d5a0baf9954c37fd35b (diff) | |
| download | glibc-69f155d4fc11f2f0e1dd1bfcd804192303ba1627.tar.xz glibc-69f155d4fc11f2f0e1dd1bfcd804192303ba1627.zip | |
Update.
1998-04-30 16:45 Ulrich Drepper <drepper@cygnus.com>
* inet/ether_aton.c: Including netinet/if_ether.h is not necessary.
* locale/Makefile (distribute): Add programs/repertoire.h.
(localedef-modules): Add repertoire.
(CPPFLAGS): Define REPERTOIREMAP_PATH.
* locale/programs/repertoire.c: New file.
* locale/programs/repertoire.h: New file.
* locale/programs/charmap.c: Starting fixing character set handling
to handle multi-byte encodings.
* locale/programs/charset.c: Likewise.
* locale/programs/charset.h: Likewise.
* locale/programs/ld-collate.c: Likewise.
* locale/programs/ld-ctype.c: Likewise.
* locale/programs/linereader.c: Likewise.
* locale/programs/localedef.c: Likewise.
* locale/programs/locfile-kw.gperf: Likewise.
* locale/programs/locfile-kw.h: Likewise.
* locale/programs/locfile-token.h: Likewise.
* locale/programs/locfile.h: Likewise.
* locale/programs/stringtrans.c: Likewise.
1998-04-18 Philip Blundell <Philip.Blundell@pobox.com>
* sysdeps/arm/memset.S: Fix off by one error.
* sysdeps/unix/sysv/linux/arm/sysdep.h (PSEUDO): On error, call
__syscall_error rather than syscall_error directly.
1998-04-17 Philip Blundell <Philip.Blundell@pobox.com>
* sysdeps/unix/sysv/linux/arm/mmap.S: New file; implementation of
mmap() syscall for ARM.
* sysdeps/unix/arm/start.c: New file; startup code for ARM a.out
binaries.
Diffstat (limited to 'locale/programs')
| -rw-r--r-- | locale/programs/charmap.c | 50 | ||||
| -rw-r--r-- | locale/programs/charset.c | 59 | ||||
| -rw-r--r-- | locale/programs/charset.h | 17 | ||||
| -rw-r--r-- | locale/programs/ld-collate.c | 16 | ||||
| -rw-r--r-- | locale/programs/ld-ctype.c | 38 | ||||
| -rw-r--r-- | locale/programs/linereader.c | 2 | ||||
| -rw-r--r-- | locale/programs/localedef.c | 17 | ||||
| -rw-r--r-- | locale/programs/locfile-kw.gperf | 3 | ||||
| -rw-r--r-- | locale/programs/locfile-kw.h | 160 | ||||
| -rw-r--r-- | locale/programs/locfile-token.h | 5 | ||||
| -rw-r--r-- | locale/programs/locfile.h | 3 | ||||
| -rw-r--r-- | locale/programs/repertoire.c | 323 | ||||
| -rw-r--r-- | locale/programs/repertoire.h | 38 | ||||
| -rw-r--r-- | locale/programs/stringtrans.c | 5 |
14 files changed, 565 insertions, 171 deletions
diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c index 0cd62fbfd4..7114a237a0 100644 --- a/locale/programs/charmap.c +++ b/locale/programs/charmap.c @@ -33,6 +33,8 @@ #include "error.h" #include "linereader.h" #include "charset.h" +#include "locfile.h" +#include "repertoire.h" /* Uncomment following line for production version. */ @@ -209,6 +211,8 @@ parse_charmap (const char *filename) memset (result, '\0', sizeof (struct charset_t)); /* The default DEFAULT_WIDTH is 1. */ result->width_default = 1; + /* Let the user overwrite the repertoire map we use. */ + result->repertoiremap = repertoiremap; #define obstack_chunk_alloc malloc #define obstack_chunk_free free @@ -265,6 +269,17 @@ parse_charmap (const char *filename) lr_ignore_rest (cmfile, 1); + /* Read the repertoire map now. */ + if (result->repertoiremap == NULL) + /* This is fatal. */ + error (4, 0, _("no repertoire map specified: cannot proceed")); + + result->repertoire = repertoire_read (result->repertoiremap); + if (result->repertoire == NULL) + /* This is also fatal. */ + error (4, errno, _("cannot read repertoire map `%s'"), + result->repertoiremap); + state = 2; continue; } @@ -273,7 +288,7 @@ parse_charmap (const char *filename) && nowtok != tok_mb_cur_min && nowtok != tok_escape_char && nowtok != tok_comment_char && nowtok != tok_g0esc && nowtok != tok_g1esc && nowtok != tok_g2esc - && nowtok != tok_g3esc) + && nowtok != tok_g3esc && nowtok != tok_repertoiremap) { lr_error (cmfile, _("syntax error in prolog: %s"), _("illegal definition")); @@ -305,6 +320,18 @@ parse_charmap (const char *filename) lr_ignore_rest (cmfile, 1); continue; + case tok_repertoiremap: + if (arg->tok != tok_ident) + goto badarg; + + if (result->repertoiremap == NULL) + result->repertoiremap = obstack_copy0 (&result->mem_pool, + arg->val.str.start, + arg->val.str.len); + + lr_ignore_rest (cmfile, 1); + continue; + case tok_mb_cur_max: case tok_mb_cur_min: if (arg->tok != tok_number) @@ -437,14 +464,14 @@ argument to <%s> must be a single character"), continue; } - if (nowtok == tok_charcode) - /* Write char value in table. */ - charset_new_char (cmfile, result, now->val.charcode.nbytes, - now->val.charcode.val, from_name, to_name); + if (now->val.charcode.nbytes < result->mb_cur_min) + lr_error (cmfile, _("too few bytes in character encoding")); + else if (now->val.charcode.nbytes > result->mb_cur_max) + lr_error (cmfile, _("too many bytes in character encoding")); else - /* Determine ISO 10646 value and write into table. */ - charset_new_unicode (cmfile, result, now->val.charcode.nbytes, - now->val.charcode.val, from_name, to_name); + charset_new_char (cmfile, &result->char_table, + now->val.charcode.nbytes, + now->val.charcode.val, from_name, to_name); /* Ignore trailing comment silently. */ lr_ignore_rest (cmfile, 0); @@ -466,8 +493,7 @@ argument to <%s> must be a single character"), continue; } - /* If the previous line was not completely correct free the - used memory. */ + /* Copy the to-name in a safe place. */ to_name = (char *) obstack_copy0 (&result->mem_pool, cmfile->token.val.str.start, cmfile->token.val.str.len); @@ -694,7 +720,7 @@ new_width (struct linereader *cmfile, struct charset_t *result, { unsigned int from_val, to_val; - from_val = charset_find_value (result, from, strlen (from)); + from_val = charset_find_value (&result->char_table, from, strlen (from)); if ((wchar_t) from_val == ILLEGAL_CHAR_VALUE) { lr_error (cmfile, _("unknown character `%s'"), from); @@ -705,7 +731,7 @@ new_width (struct linereader *cmfile, struct charset_t *result, to_val = from_val; else { - to_val = charset_find_value (result, to, strlen (to)); + to_val = charset_find_value (&result->char_table, to, strlen (to)); if ((wchar_t) to_val == ILLEGAL_CHAR_VALUE) { lr_error (cmfile, _("unknown character `%s'"), to); diff --git a/locale/programs/charset.c b/locale/programs/charset.c index fdacf25328..767fafb93a 100644 --- a/locale/programs/charset.c +++ b/locale/programs/charset.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. @@ -23,6 +23,8 @@ #include <alloca.h> #include <ctype.h> +#include <errno.h> +#include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -31,49 +33,24 @@ #include "charset.h" -static void -insert_char (struct linereader *lr, struct charset_t *cs, int bytes, - unsigned int value, const char *from, const char *to); - - -void -charset_new_char (struct linereader *lr, struct charset_t *cs, int bytes, - unsigned int value, const char *from, const char *to) -{ - if (bytes < cs->mb_cur_min) - lr_error (lr, _("too few bytes in character encoding")); - else if (bytes > cs->mb_cur_max) - lr_error (lr, _("too many bytes in character encoding")); - else - insert_char (lr, cs, bytes, value, from, to); -} - - -void -charset_new_unicode (struct linereader *lr, struct charset_t *cs, int bytes, - unsigned int value, const char *from, const char *to) -{ - /* For now: perhaps <Uxxxx> support will be removed again... */ - insert_char (lr, cs, bytes, value, from, to); -} - - unsigned int -charset_find_value (const struct charset_t *cs, const char *name, size_t len) +charset_find_value (const hash_table *ht, const char *name, size_t len) { void *result; - if (find_entry ((hash_table *) &cs->char_table, name, len, &result) < 0) + if (find_entry ((hash_table *) ht, name, len, &result) < 0) return ILLEGAL_CHAR_VALUE; return (unsigned int) ((unsigned long int) result); } -static void -insert_char (struct linereader *lr, struct charset_t *cs, int bytes, - unsigned int value, const char *from, const char *to) +void +charset_new_char (struct linereader *lr, hash_table *ht, int bytes, + unsigned int value, const char *from, const char *to) { + char *from_end; + char *to_end; const char *cp; char *buf; int prefix_len, len1, len2; @@ -81,7 +58,7 @@ insert_char (struct linereader *lr, struct charset_t *cs, int bytes, if (to == NULL) { - if (insert_entry (&cs->char_table, from, strlen (from), + if (insert_entry (ht, from, strlen (from), (void *) (unsigned long int) value) < 0) lr_error (lr, _("duplicate character name `%s'"), from); @@ -111,8 +88,16 @@ insert_char (struct linereader *lr, struct charset_t *cs, int bytes, if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0) goto illegal_range; - from_nr = strtoul (&from[prefix_len], NULL, 10); - to_nr = strtoul (&to[prefix_len], NULL, 10); + errno = 0; + from_nr = strtoul (&from[prefix_len], &from_end, 10); + if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE) + || ((to_nr = strtoul (&to[prefix_len], &to_end, 10)) == ULONG_MAX + && errno == ERANGE) + || *to_end != '\0') + { + lr_error (lr, _("<%s> and <%s> are illegal names for range")); + return; + } if (from_nr > to_nr) { @@ -127,7 +112,7 @@ insert_char (struct linereader *lr, struct charset_t *cs, int bytes, { sprintf (&buf[prefix_len], "%0*d", len1 - prefix_len, cnt); - if (insert_entry (&cs->char_table, buf, len1, + if (insert_entry (ht, buf, len1, (void *) (unsigned long int) (value + (cnt - from_nr))) < 0) lr_error (lr, _("duplicate character name `%s'"), buf); diff --git a/locale/programs/charset.h b/locale/programs/charset.h index 82c4ef0c57..db93f16306 100644 --- a/locale/programs/charset.h +++ b/locale/programs/charset.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. @@ -22,6 +22,7 @@ #include <obstack.h> +#include "repertoire.h" #include "simple-hash.h" #include "linereader.h" @@ -36,6 +37,9 @@ struct width_rule struct charset_t { + const char *repertoiremap; + struct repertoire_t *repertoire; + const char *code_set_name; int mb_cur_min; int mb_cur_max; @@ -63,14 +67,11 @@ extern int be_quiet; struct charset_t *charmap_read (const char *filename); /* Prototypes for function to insert new character. */ -void charset_new_char (struct linereader *lr, struct charset_t *cs, int bytes, +void charset_new_char (struct linereader *lr, hash_table *ht, int bytes, unsigned int value, const char *from, const char *to); -void charset_new_unicode (struct linereader *lr, struct charset_t *cs, - int bytes, unsigned int value, const char *from, - const char *to); - -unsigned int charset_find_value (const struct charset_t *__cs, - const char *__name, size_t __len); +/* Return the value stored under the given key in the hashing table. */ +unsigned int charset_find_value (const hash_table *ht, + const char *name, size_t len); #endif /* charset.h */ diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c index 57b97767af..a92ff1154a 100644 --- a/locale/programs/ld-collate.c +++ b/locale/programs/ld-collate.c @@ -212,7 +212,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset) void *ptmp; unsigned int value = 0; - wch = charset_find_value (charset, patch->token, toklen); + wch = charset_find_value (&charset->char_table, patch->token, toklen); if (wch != ILLEGAL_CHAR_VALUE) { element_t *runp; @@ -1054,7 +1054,8 @@ collate_element_to (struct linereader *lr, struct localedef_t *locale, collate->combine_token = NULL; } - value = charset_find_value (charset, code->val.str.start, code->val.str.len); + value = charset_find_value (&charset->char_table, code->val.str.start, + code->val.str.len); if ((wchar_t) value != ILLEGAL_CHAR_VALUE) { lr_error (lr, _("symbol for multicharacter collating element " @@ -1181,7 +1182,8 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale, wchar_t value; void *not_used; - value = charset_find_value (charset, code->val.str.start, code->val.str.len); + value = charset_find_value (&charset->char_table, code->val.str.start, + code->val.str.len); if (value != ILLEGAL_CHAR_VALUE) { lr_error (lr, _("symbol for multicharacter collating element " @@ -1268,7 +1270,7 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale, { case tok_bsymbol: /* We have a string to find in one of the three hashing tables. */ - value = charset_find_value (charset, code->val.str.start, + value = charset_find_value (&charset->char_table, code->val.str.start, code->val.str.len); if (value != ILLEGAL_CHAR_VALUE) { @@ -1533,7 +1535,8 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale, assert (code->tok == tok_bsymbol); - value = charset_find_value (charset, code->val.str.start, code->val.str.len); + value = charset_find_value (&charset->char_table, code->val.str.start, + code->val.str.len); if (value != ILLEGAL_CHAR_VALUE) { element_t *runp; @@ -1706,7 +1709,8 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale, return -1; } - wch = charset_find_value (charset, startp, putp - startp); + wch = charset_find_value (&charset->char_table, startp, + putp - startp); if (wch != ILLEGAL_CHAR_VALUE) { element_t *pelem; diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index f2f32cc214..2f9a9a2caf 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -308,7 +308,7 @@ character %s'%s' in class `%s' must not be in class `%s'"), } /* ... and now test <SP> as a special case. */ - space_value = charset_find_value (charset, "SP", 2); + space_value = charset_find_value (&charset->char_table, "SP", 2); if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -634,7 +634,8 @@ ctype_class_from (struct linereader *lr, struct localedef_t *locale, struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; unsigned int value; - value = charset_find_value (charset, code->val.str.start, code->val.str.len); + value = charset_find_value (&charset->char_table, code->val.str.start, + code->val.str.len); ctype->last_class_char = value; @@ -656,7 +657,8 @@ ctype_class_to (struct linereader *lr, struct localedef_t *locale, struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; unsigned int value, cnt; - value = charset_find_value (charset, code->val.str.start, code->val.str.len); + value = charset_find_value (&charset->char_table, code->val.str.start, + code->val.str.len); /* In the LC_CTYPE category it is no error when a character is not found. This has to be ignored silently. */ @@ -750,7 +752,8 @@ ctype_map_from (struct linereader *lr, struct localedef_t *locale, struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; unsigned int value; - value = charset_find_value (charset, code->val.str.start, code->val.str.len); + value = charset_find_value (&charset->char_table, code->val.str.start, + code->val.str.len); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) /* In the LC_CTYPE category it is no error when a character is @@ -770,7 +773,8 @@ ctype_map_to (struct linereader *lr, struct localedef_t *locale, struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; unsigned int value; - value = charset_find_value (charset, code->val.str.start, code->val.str.len); + value = charset_find_value (&charset->char_table, code->val.str.start, + code->val.str.len); if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE || (wchar_t) value == ILLEGAL_CHAR_VALUE) @@ -948,7 +952,7 @@ set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset) unsigned int value; tmp[0] = ch; - value = charset_find_value (charset, tmp, 1); + value = charset_find_value (&charset->char_table, tmp, 1); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1013,7 +1017,7 @@ character `%s' not defined while needed as default value"), { unsigned int value; - value = charset_find_value (charset, "space", 5); + value = charset_find_value (&charset->char_table, "space", 5); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1024,7 +1028,7 @@ character `%s' not defined while needed as default value"), else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (charset, "form-feed", 9); + value = charset_find_value (&charset->char_table, "form-feed", 9); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1035,7 +1039,7 @@ character `%s' not defined while needed as default value"), else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (charset, "newline", 7); + value = charset_find_value (&charset->char_table, "newline", 7); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1046,7 +1050,7 @@ character `%s' not defined while needed as default value"), else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (charset, "carriage-return", 15); + value = charset_find_value (&charset->char_table, "carriage-return", 15); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1057,7 +1061,7 @@ character `%s' not defined while needed as default value"), else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (charset, "tab", 3); + value = charset_find_value (&charset->char_table, "tab", 3); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1068,7 +1072,7 @@ character `%s' not defined while needed as default value"), else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (charset, "vertical-tab", 12); + value = charset_find_value (&charset->char_table, "vertical-tab", 12); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1097,7 +1101,7 @@ character `%s' not defined while needed as default value"), { unsigned int value; - value = charset_find_value (charset, "space", 5); + value = charset_find_value (&charset->char_table, "space", 5); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1108,7 +1112,7 @@ character `%s' not defined while needed as default value"), else ELEM (ctype, class_collection, , value) |= BIT (tok_blank); - value = charset_find_value (charset, "tab", 3); + value = charset_find_value (&charset->char_table, "tab", 3); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1149,7 +1153,7 @@ character `%s' not defined while needed as default value"), if ((ctype->class_collection[cnt] & mask) != 0) ctype->class_collection[cnt] |= BIT (tok_print); - space = charset_find_value (charset, "space", 5); + space = charset_find_value (&charset->char_table, "space", 5); if (space == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1178,7 +1182,7 @@ character `%s' not defined while needed as default value"), tmp[1] = (char) ch; - value_from = charset_find_value (charset, &tmp[1], 1); + value_from = charset_find_value (&charset->char_table, &tmp[1], 1); if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE) { if (!be_quiet) @@ -1190,7 +1194,7 @@ character `%s' not defined while needed as default value"), /* This conversion is implementation defined. */ tmp[1] = (char) (ch + ('A' - 'a')); - value_to = charset_find_value (charset, &tmp[1], 1); + value_to = charset_find_value (&charset->char_table, &tmp[1], 1); if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE) { if (!be_quiet) diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c index 6692164b60..4406e1ab72 100644 --- a/locale/programs/linereader.c +++ b/locale/programs/linereader.c @@ -524,7 +524,7 @@ get_string (struct linereader *lr, const struct charset_t *charset) if (lr->translate_strings) { - value = charset_find_value (charset, &buf[startidx], + value = charset_find_value (&charset->char_table, &buf[startidx], bufact - startidx); if ((wchar_t) value == ILLEGAL_CHAR_VALUE) illegal_string = 1; |
