diff options
| -rw-r--r-- | ChangeLog | 71 | ||||
| -rw-r--r-- | ctype/ctype-info.c | 3 | ||||
| -rw-r--r-- | locale/C-ctype.c | 2 | ||||
| -rw-r--r-- | locale/categories.def | 2 | ||||
| -rw-r--r-- | locale/langinfo.h | 2 | ||||
| -rw-r--r-- | locale/lc-ctype.c | 30 | ||||
| -rw-r--r-- | locale/programs/ld-ctype.c | 1195 | ||||
| -rw-r--r-- | locale/programs/localedef.c | 8 | ||||
| -rw-r--r-- | locale/programs/localedef.h | 1 | ||||
| -rw-r--r-- | localedata/ChangeLog | 3 | ||||
| -rw-r--r-- | localedata/tst-wctype.c | 104 | ||||
| -rw-r--r-- | localedata/tst-wctype.input | 4 | ||||
| -rw-r--r-- | wcsmbs/wcwidth.h | 42 | ||||
| -rw-r--r-- | wctype/iswctype.c | 31 | ||||
| -rw-r--r-- | wctype/iswctype_l.c | 37 | ||||
| -rw-r--r-- | wctype/towctrans.c | 26 | ||||
| -rw-r--r-- | wctype/towctrans_l.c | 29 | ||||
| -rw-r--r-- | wctype/wcextra.c | 29 | ||||
| -rw-r--r-- | wctype/wcextra_l.c | 36 | ||||
| -rw-r--r-- | wctype/wcfuncs.c | 94 | ||||
| -rw-r--r-- | wctype/wcfuncs_l.c | 116 | ||||
| -rw-r--r-- | wctype/wchar-lookup.h | 139 | ||||
| -rw-r--r-- | wctype/wctrans.c | 25 | ||||
| -rw-r--r-- | wctype/wctype.c | 21 | ||||
| -rw-r--r-- | wctype/wctype.h | 42 | ||||
| -rw-r--r-- | wctype/wctype_l.c | 21 |
26 files changed, 1769 insertions, 344 deletions
@@ -1,3 +1,74 @@ +2000-07-23 Bruno Haible <haible@clisp.cons.org> + + * wctype/wchar-lookup.h: New file. + * wctype/iswctype.c: Include "wchar-lookup.h". + (__iswctype): Support alternate locale format with 3-level tables. + * wctype/iswctype_l.c (__iswctype_l): Likewise. + * wctype/towctrans.c (__towctrans): Likewise. + * wctype/towctrans_l.c (__towctrans_l): Likewise. + * wctype/wcfuncs.c: Include "wchar-lookup.h". + (__ctype32_wctype, __ctype32_wctrans): Declare external. + (__iswalnum, __iswalpha, __iswcntrl, __iswdigit, __iswlower, + __iswgraph, __iswprint, __iswpunct, __iswspace, __iswupper, + __iswxdigit, towlower, towupper): Support alternate locale format + with 3-level tables. + * wctype/wcextra.c (iswblank): Likewise. + * wctype/wcfuncs_l.c: Include "wchar-lookup.h". + (__iswalnum_l, __iswalpha_l, __iswcntrl_l, __iswdigit_l, __iswlower_l, + __iswgraph_l, __iswprint_l, __iswpunct_l, __iswspace_l, __iswupper_l, + __iswxdigit_l, __towlower_l, __towupper_l): Support alternate locale + format with 3-level tables. + * wctype/wcextra_l.c (__iswblank_l): Likewise. + * wctype/wctype.c (__wctype): Likewise. In the alternate locale + format, return a 3-level table pointer. + * wctype/wctype_l.c (__wctype_l): Likewise. + * wctype/wctrans.c (wctrans): Likewise. + * wctype/wctype.h (__ISwupper, __ISwlower, __ISwalpha, __ISwdigit, + __ISwxdigit, __ISwspace, __ISwprint, __ISwgraph, __ISwblank, + __ISwcntrl, __ISwpunct, __ISwalnum): New enum values. + (iswctype): Remove macro definition. + * wcsmbs/wcwidth.h: Include "wchar-lookup.h". + (internal_wcwidth): Support alternate locale format with 3-level + tables. + * locale/langinfo.h (_NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET): + New nl_items. + * locale/categories.def (_NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET): + Define them as being type "word". + * locale/C-ctype.c (_nl_C_LC_CTYPE): Add initializers for them. + * ctype/ctype-info.c (__ctype32_wctype, __ctype32_wctrans, + __ctype32_width): New exported variables. + * locale/lc-ctype.c (_nl_postload_ctype): Initialize them in the + alternate locale format. Don't initialize __ctype_names and + __ctype_width in the alternate locale format. + * locale/programs/localedef.h (oldstyle_tables): New declaration. + * locale/programs/localedef.c (oldstyle_tables): New variable. + (OPT_OLDSTYLE): New macro. + (options): Add --old-style option. + (parse_opt): Handle --old-style option. + * locale/programs/ld-ctype.c (locale_ctype_t): Add class_offset, + map_offset, class_3level, map_3level, width_3level members. + (ctype_output): Support for alternate locale format: Computation of + nelems changes. _NL_CTYPE_TOUPPER32, _NL_CTYPE_TOLOWER32 and + _NL_CTYPE_CLASS32 only 256 characters. _NL_CTYPE_NAMES empty. + New fields _NL_CTYPE_CLASS_OFFSET, _NL_CTYPE_MAP_OFFSET. Field + _NL_CTYPE_WIDTH now contains the three-level table. Extra elems + now contain both class and map tables. + (struct wctype_table): New type. + (wctype_table_init, wctype_table_add, wctype_table_finalize): New + functions. + (struct wcwidth_table): New type. + (wcwidth_table_init, wcwidth_table_add, wcwidth_table_finalize): New + functions. + (struct wctrans_table): New type. + (wctrans_table_init, wctrans_table_add, wctrans_table_finalize): New + functions. + (allocate_arrays): Support for alternate locale format: Set + plane_size and plane_cnt to 0. Restrict ctype->ctype32_b to the first + 256 characters. Compute ctype->class_3level. Restrict ctype->map32[idx] + to the first 256 characters. Compute ctype->map_3level. Set + ctype->class_offset and ctype->map_offset. Compute ctype->width_3level + instead of ctype->width. + 2000-07-24 Ulrich Drepper <drepper@redhat.com> * libio/iogetwline.c (_IO_getwline_info): Use wide character diff --git a/ctype/ctype-info.c b/ctype/ctype-info.c index 81d5d6a2fc..18d88d9534 100644 --- a/ctype/ctype-info.c +++ b/ctype/ctype-info.c @@ -37,3 +37,6 @@ const __uint32_t *__ctype32_tolower = b (__uint32_t, tolower, 128); const __uint32_t *__ctype32_toupper = b (__uint32_t, toupper, 128); const __uint32_t *__ctype_names = b (__uint32_t, names, 0); const unsigned char *__ctype_width = b (unsigned char, width, 0); +const char *__ctype32_wctype[12]; +const char *__ctype32_wctrans[2]; +const char *__ctype32_width; diff --git a/locale/C-ctype.c b/locale/C-ctype.c index fe1e8ac1fb..66f2d21598 100644 --- a/locale/C-ctype.c +++ b/locale/C-ctype.c @@ -380,6 +380,8 @@ const struct locale_data _nl_C_LC_CTYPE = { string: "ANSI_X3.4-1968" }, { string: (const char *) &_nl_C_LC_CTYPE_toupper[128] }, { string: (const char *) &_nl_C_LC_CTYPE_tolower[128] }, + { word: 0 }, + { word: 0 }, { word: 1 }, { string: "0" }, { string: "1" }, diff --git a/locale/categories.def b/locale/categories.def index a8fa30e575..95873d0036 100644 --- a/locale/categories.def +++ b/locale/categories.def @@ -84,6 +84,8 @@ DEFINE_CATEGORY DEFINE_ELEMENT (_NL_CTYPE_CODESET_NAME, "charmap", std, string) DEFINE_ELEMENT (_NL_CTYPE_TOUPPER32, "ctype-toupper32", std, string) DEFINE_ELEMENT (_NL_CTYPE_TOLOWER32, "ctype-tolower32", std, string) + DEFINE_ELEMENT (_NL_CTYPE_CLASS_OFFSET, "ctype-class-offset", std, word) + DEFINE_ELEMENT (_NL_CTYPE_MAP_OFFSET, "ctype-map-offset", std, word) DEFINE_ELEMENT (_NL_CTYPE_INDIGITS_MB_LEN, "ctype-indigits_mb-len", std, word) DEFINE_ELEMENT (_NL_CTYPE_INDIGITS0_MB, "ctype-indigits0_mb", std, string) DEFINE_ELEMENT (_NL_CTYPE_INDIGITS1_MB, "ctype-indigits1_mb", std, string) diff --git a/locale/langinfo.h b/locale/langinfo.h index 69d7292765..0062cda908 100644 --- a/locale/langinfo.h +++ b/locale/langinfo.h @@ -274,6 +274,8 @@ enum #define CODESET CODESET _NL_CTYPE_TOUPPER32, _NL_CTYPE_TOLOWER32, + _NL_CTYPE_CLASS_OFFSET, + _NL_CTYPE_MAP_OFFSET, _NL_CTYPE_INDIGITS_MB_LEN, _NL_CTYPE_INDIGITS0_MB, _NL_CTYPE_INDIGITS1_MB, diff --git a/locale/lc-ctype.c b/locale/lc-ctype.c index 1a35e245cb..4b40b8a22d 100644 --- a/locale/lc-ctype.c +++ b/locale/lc-ctype.c @@ -1,5 +1,5 @@ /* Define current locale data for LC_CTYPE category. - Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -45,6 +45,9 @@ _nl_postload_ctype (void) extern const unsigned char *__ctype_width; extern const uint32_t *__ctype32_toupper; extern const uint32_t *__ctype32_tolower; + extern const char *__ctype32_wctype[12]; + extern const char *__ctype32_wctrans[2]; + extern const char *__ctype32_width; __ctype_b = current (uint16_t, CLASS, 128); __ctype_toupper = current (uint32_t, TOUPPER, 128); @@ -52,6 +55,27 @@ _nl_postload_ctype (void) __ctype32_b = current (uint32_t, CLASS32, 0); __ctype32_toupper = current (uint32_t, TOUPPER32, 0); __ctype32_tolower = current (uint32_t, TOLOWER32, 0); - __ctype_names = current (uint32_t, NAMES, 0); - __ctype_width = current (unsigned char, WIDTH, 0); + if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0) + { + /* Old locale format. */ + __ctype_names = current (uint32_t, NAMES, 0); + __ctype_width = current (unsigned char, WIDTH, 0); + } + else + { + /* New locale format. */ + size_t offset, cnt; + + offset = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_CLASS_OFFSET); + for (cnt = 0; cnt < 12; cnt++) + __ctype32_wctype[cnt] = + _nl_current_LC_CTYPE->values[offset + cnt].string; + + offset = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_OFFSET); + for (cnt = 0; cnt < 2; cnt++) + __ctype32_wctrans[cnt] = + _nl_current_LC_CTYPE->values[offset + cnt].string; + + __ctype32_width = current (char, WIDTH, 0); + } } diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index 5dfcec3339..14b174e9f1 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -125,6 +125,7 @@ struct locale_ctype_t size_t class_collection_max; size_t class_collection_act; uint32_t class_done; + uint32_t class_offset; struct charseq **mbdigits; size_t mbdigits_act; @@ -148,6 +149,7 @@ struct locale_ctype_t size_t map_collection_nr; size_t last_map_idx; int tomap_done[MAX_NR_CHARMAP]; + uint32_t map_offset; /* Transliteration information. */ const char *translit_copy_locale; @@ -168,9 +170,12 @@ struct locale_ctype_t uint32_t *names; uint32_t **map; uint32_t **map32; + struct iovec *class_3level; + struct iovec *map_3level; uint32_t *class_name_ptr; uint32_t *map_name_ptr; unsigned char *width; + struct iovec width_3level; uint32_t mb_cur_max; const char *codeset_name; uint32_t *translit_from_idx; @@ -834,7 +839,9 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap, static const char nulbytes[4] = { 0, 0, 0, 0 }; struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; const size_t nelems = (_NL_ITEM_INDEX (_NL_NUM_LC_CTYPE) - + (ctype->map_collection_nr - 2)); + + (oldstyle_tables + ? (ctype->map_collection_nr - 2) + : (ctype->nr_charclass + ctype->map_collection_nr))); struct iovec iov[2 + nelems + ctype->nr_charclass + ctype->map_collection_nr + 2]; struct locale_file data; @@ -893,21 +900,28 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap, CTYPE_DATA (_NL_CTYPE_TOUPPER32, ctype->map32[0], - (ctype->plane_size * ctype->plane_cnt) + (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256) * sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_TOLOWER32, ctype->map32[1], - (ctype->plane_size * ctype->plane_cnt) + (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256) * sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_CLASS32, ctype->ctype32_b, - (ctype->plane_size * ctype->plane_cnt - * sizeof (char_class32_t))); + (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256) + * sizeof (char_class32_t)); CTYPE_DATA (_NL_CTYPE_NAMES, - ctype->names, (ctype->plane_size * ctype->plane_cnt - * sizeof (uint32_t))); + ctype->names, + (oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 0) + * sizeof (uint32_t)); + + CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET, + &ctype->class_offset, sizeof (uint32_t)); + + CTYPE_DATA (_NL_CTYPE_MAP_OFFSET, + &ctype->map_offset, sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE, &ctype->translit_idx_size, sizeof (uint32_t)); @@ -969,8 +983,12 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap, break; CTYPE_DATA (_NL_CTYPE_WIDTH, - ctype->width, - (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul); + (oldstyle_tables + ? ctype->width + : ctype->width_3level.iov_base), + (oldstyle_tables + ? (ctype->plane_size * ctype->plane_cnt + 3) & ~3ul + : ctype->width_3level.iov_len)); CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX, &ctype->mb_cur_max, sizeof (uint32_t)); @@ -1135,14 +1153,32 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap, else { /* Handle extra maps. */ - size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2; + if (oldstyle_tables) + { + size_t nr = (elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE)) + 2; - iov[2 + elem + offset].iov_base = ctype->map32[nr]; - iov[2 + elem + offset].iov_len = ((ctype->plane_size - * ctype->plane_cnt) - * sizeof (uint32_t)); + iov[2 + elem + offset].iov_base = ctype->map32[nr]; + iov[2 + elem + offset].iov_len = ((ctype->plane_size + * ctype->plane_cnt) + * sizeof (uint32_t)); - idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + } + else + { + size_t nr = elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE); + if (nr < ctype->nr_charclass) + { + iov[2 + elem + offset] = ctype->class_3level[nr]; + } + else + { + nr -= ctype->nr_charclass; + assert (nr < ctype->map_collection_nr); + iov[2 + elem + offset] = ctype->map_3level[nr]; + } + idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + } } } @@ -3194,6 +3230,628 @@ no output digits defined and none of the standard names in the charmap")); } +/* Construction of sparse 3-level tables. + See wchar-lookup.h for their structure and the meaning of p and q. */ + +struct wctype_table +{ + /* Parameters. */ + unsigned int p; + unsigned int q; + /* Working representation. */ + size_t level1_alloc; + size_t level1_size; + uint32_t *level1; + size_t level2_alloc; + size_t level2_size; + uint32_t *level2; + size_t level3_alloc; + size_t level3_size; + uint32_t *level3; + /* Compressed representation. */ + size_t result_size; + char *result; +}; + +/* Initialize. Assumes t->p and t->q have already been set. */ +static inline void +wctype_table_init (struct wctype_table *t) +{ + t->level1_alloc = t->level1_size = 0; + t->level2_alloc = t->level2_size = 0; + t->level3_alloc = t->level3_size = 0; +} + +/* Add one entry. */ +static void +wctype_table_add (struct wctype_table *t, uint32_t wc) +{ + uint32_t index1 = wc >> (t->q + t->p + 5); + uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1); + uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1); + uint32_t index4 = wc & 0x1f; + size_t i, i1, i2; + + if (index1 >= t->level1_size) + { + if (index1 >= t->level1_alloc) + { + size_t alloc = 2 * t->level1_alloc; + if (alloc <= index1) + alloc = index1 + 1; + t->level1 = (t->level1_alloc > 0 + ? (uint32_t *) xrealloc ((char *) t->level1, + alloc * sizeof (uint32_t)) + : (uint32_t *) xmalloc (alloc * sizeof (uint32_t))); + t->level1_alloc = alloc; + } + while (index1 >= t->level1_size) + t->level1[t->level1_size++] = ~((uint32_t) 0); + } + + if (t->level1[index1] == ~((uint32_t) 0)) + { + if (t->level2_size == t->level2_alloc) + { + size_t alloc = 2 * t->level2_alloc + 1; + t->level2 = (t->level2_alloc > 0 + ? (uint32_t *) xrealloc ((char *) t->level2, + (alloc << t->q) * sizeof (uint32_t)) + : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t))); + t->level2_alloc = alloc; + } + i1 = t->level2_size << t->q; + i2 = (t->level2_size + 1) << t->q; + for (i = i1; i < i2; i++) + t->level2[i] = ~((uint32_t) 0); + t->level1[index1] = t->level2_size++; + } + + index2 += t->level1[index1] << t->q; + + if (t->level2[index2] == ~((uint32_t) 0)) + { + if (t->level3_size == t->level3_alloc) + { + size_t alloc = 2 * t->level3_alloc + 1; + t->level3 = (t->level3_alloc > 0 + ? (uint32_t *) xrealloc ((char *) t->level3, + (alloc << t->p) * sizeof (uint32_t)) + : (uint32_t *) xmalloc ((alloc << t->p) * sizeof (uint32_t))); + t->level3_alloc = alloc; + } + i1 = t->level3_size << t->p; + i2 = (t->level3_size + 1) << t->p; + for (i = i1; i < i2; i++) + t->level3[i] = 0; + t->level2[index2] = t->level3_size++; + } + + index3 += t->level2[index2] << t->p; + + t->level3[index3] |= (uint32_t)1 << index4; +} + +/* Finalize and shrink. */ +static void +wctype_table_finalize (struct wctype_table *t) +{ + size_t i, j, k; + uint32_t reorder3[t->level3_size]; + uint32_t reorder2[t->level2_size]; + uint32_t level1_offset, level2_offset, level3_offset; + + /* Uniquify level3 blocks. */ + k = 0; + for (j = 0; j < t->level3_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (uint32_t)) == 0) + break; + /* Relocate block j to block i. */ + reorder3[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (uint32_t)); + k++; + } + } + t->level3_size = k; + + for (i = 0; i < (t->level2_size << t->q); i++) + if (t->level2[i] != ~((uint32_t) 0)) + t->level2[i] = reorder3[t->level2[i]]; + + /* Uniquify level2 blocks. */ + k = 0; + for (j = 0; j < t->level2_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)) == 0) + break; + /* Relocate block j to block i. */ + reorder2[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)); + k++; + } + } + t->level2_size = k; + + for (i = 0; i < t->level1_size; i++) + if (t->level1[i] != ~((uint32_t) 0)) + t->level1[i] = reorder2[t->level1[i]]; + + /* Create and fill the resulting compressed representation. */ + t->result_size = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t) + + (t->level3_size << t->p) * sizeof (uint32_t); + t->result = (char *) xmalloc (t->result_size); + + level1_offset = + 5 * sizeof (uint32_t); + level2_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t); + level3_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t); + + ((uint32_t *) t->result)[0] = t->q + t->p + 5; + ((uint32_t *) t->result)[1] = t->level1_size; + ((uint32_t *) t->result)[2] = t->p + 5; + ((uint32_t *) t->result)[3] = (1 << t->q) - 1; + ((uint32_t *) t->result)[4] = (1 << t->p) - 1; + + for (i = 0; i < t->level1_size; i++) + ((uint32_t *) (t->result + level1_offset))[i] = + (t->level1[i] == ~((uint32_t) 0) + ? 0 + : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); + |
