diff options
| author | Ulrich Drepper <drepper@redhat.com> | 1999-12-13 07:40:47 +0000 |
|---|---|---|
| committer | Ulrich Drepper <drepper@redhat.com> | 1999-12-13 07:40:47 +0000 |
| commit | 440a52ea7b427b7a5668c77283825cae20d7fc3c (patch) | |
| tree | 770813c20cccd136ec929af19dc1a28f1d752036 | |
| parent | d3e7b47c904c27a5ec25bb8afd2c37c6cdc5baa7 (diff) | |
| download | glibc-440a52ea7b427b7a5668c77283825cae20d7fc3c.tar.xz glibc-440a52ea7b427b7a5668c77283825cae20d7fc3c.zip | |
Update.
1999-12-12 Ulrich Drepper <drepper@cygnus.com>
* locale/programs/ld-collate.c (collate_read): Make symbolic
ellipsis work. Remove old and unused code.
| -rw-r--r-- | ChangeLog | 5 | ||||
| -rw-r--r-- | locale/programs/ld-collate.c | 2610 |
2 files changed, 393 insertions, 2222 deletions
@@ -1,3 +1,8 @@ +1999-12-12 Ulrich Drepper <drepper@cygnus.com> + + * locale/programs/ld-collate.c (collate_read): Make symbolic + ellipsis work. Remove old and unused code. + 1999-12-11 Ulrich Drepper <drepper@cygnus.com> * locale/programs/ld-collate.c (collate_read): Implement diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c index e1c4d14f9e..42fd601064 100644 --- a/locale/programs/ld-collate.c +++ b/locale/programs/ld-collate.c @@ -21,8 +21,10 @@ # include <config.h> #endif +#include <errno.h> #include <error.h> #include <stdlib.h> +#include <wchar.h> #include "charmap.h" #include "localeinfo.h" @@ -67,6 +69,8 @@ struct element_list_t /* Data type for collating element. */ struct element_t { + const char *name; + const char *mbs; const uint32_t *wcs; int order; @@ -85,6 +89,11 @@ struct element_t struct element_t *next; }; +/* Special element value. */ +#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1) +#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2) +#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3) + /* Data type for collating symbol. */ struct symbol_t { @@ -124,8 +133,8 @@ struct locale_collate_t /* This is the cursor for `reorder_after' insertions. */ struct element_t *cursor; - /* Remember whether last weight was an ellipsis. */ - int was_ellipsis; + /* This value is used when handling ellipsis. */ + struct element_t ellipsis_weight; /* Known collating elements. */ hash_table elem_table; @@ -168,14 +177,25 @@ make_seclist_elem (struct locale_collate_t *collate, const char *string, static struct element_t * new_element (struct locale_collate_t *collate, const char *mbs, - size_t len, const uint32_t *wcs) + const uint32_t *wcs, const char *name, size_t namelen) { struct element_t *newp; newp = (struct element_t *) obstack_alloc (&collate->mempool, sizeof (*newp)); - newp->mbs = obstack_copy0 (&collate->mempool, mbs, len); - newp->wcs = wcs; + newp->name = name == NULL ? NULL : obstack_copy (&collate->mempool, + name, namelen); + newp->mbs = mbs; + if (wcs != NULL) + { + size_t nwcs = wcslen ((wchar_t *) wcs) + 1; + uint32_t zero = 0; + obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t)); + obstack_grow (&collate->mempool, &zero, sizeof (uint32_t)); + newp->wcs = (uint32_t *) obstack_finish (&collate->mempool); + } + else + newp->wcs = NULL; newp->order = 0; /* Will be allocated later. */ @@ -437,14 +457,14 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate, result = sym->order; if (result == NULL) - result = sym->order = new_element (collate, str, len, NULL); + result = sym->order = new_element (collate, NULL, NULL, NULL, 0); } else if (find_entry (&collate->elem_table, str, len, (void **) &result) != 0) { - /* It's also no collation element. So it is an element defined - later. */ - result = new_element (collate, str, len, wcstr); + /* It's also no collation element. So it is an character + element defined later. */ + result = new_element (collate, NULL, NULL, str, len); if (result != NULL) /* Insert it into the sequence table. */ insert_entry (&collate->seq_table, str, len, result); @@ -456,9 +476,20 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate, static void +unlink_element (struct locale_collate_t *collate) +{ + if (collate->cursor->next != NULL) + collate->cursor->next->last = collate->cursor->last; + if (collate->cursor->last != NULL) + collate->cursor->last->next = collate->cursor->next; + collate->cursor = collate->cursor->last; +} + + +static void insert_weights (struct linereader *ldfile, struct element_t *elem, struct charmap_t *charmap, struct repertoire_t *repertoire, - struct locale_collate_t *collate) + struct locale_collate_t *collate, enum token_t ellipsis) { int weight_cnt; struct token *arg; @@ -494,7 +525,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem, elem->weights[weight_cnt].w = (struct element_t **) obstack_alloc (&collate->mempool, sizeof (struct element_t *)); elem->weights[weight_cnt].w[0] = NULL; - elem->weights[weight_cnt].cnt = 0; + elem->weights[weight_cnt].cnt = 1; } else if (arg->tok == tok_bsymbol) { @@ -583,6 +614,24 @@ insert_weights (struct linereader *ldfile, struct element_t *elem, /* We don't need the string anymore. */ free (arg->val.str.startmb); } + else if (ellipsis != tok_none + && (arg->tok == tok_ellipsis2 + || arg->tok == tok_ellipsis3 + || arg->tok == tok_ellipsis4)) + { + /* It must be the same ellipsis as used in the initial column. */ + if (arg->tok != ellipsis) + lr_error (ldfile, _("\ +%s: weights must use the same ellipsis symbol as the name"), + "LC_COLLATE"); + + /* The weight for this level has to be ignored. We use the + null pointer to indicate this. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2; + elem->weights[weight_cnt].cnt = 1; + } else { syntax: @@ -644,14 +693,20 @@ insert_value (struct linereader *ldfile, struct token *arg, uint32_t wc; struct element_t *elem = NULL; - /* First determine the wide character. There must be such a value, - otherwise we ignore it (if it is no collatio symbol or element). */ - wc = repertoire_find_value (repertoire, arg->val.str.startmb, - arg->val.str.lenmb); - /* Try to find the character in the charmap. */ seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb); + /* Determine the wide character. */ + if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + { + wc = repertoire_find_value (repertoire, arg->val.str.startmb, + arg->val.str.lenmb); + if (seq != NULL) + seq->ucs4 = wc; + } + else + wc = seq->ucs4; + if (wc == ILLEGAL_CHAR_VALUE && seq == NULL) { /* It's no character, so look through the collation elements and @@ -666,9 +721,7 @@ insert_value (struct linereader *ldfile, struct token *arg, elem = sym->order; if (elem == NULL) - elem = sym->order = new_element (collate, arg->val.str.startmb, - arg->val.str.lenmb, - arg->val.str.startwc); + elem = sym->order = new_element (collate, NULL, NULL, NULL, 0); } else if (find_entry (&collate->elem_table, arg->val.str.startmb, arg->val.str.lenmb, (void **) &elem) != 0) @@ -684,16 +737,17 @@ insert_value (struct linereader *ldfile, struct token *arg, if (find_entry (&collate->seq_table, arg->val.str.startmb, arg->val.str.lenmb, (void **) &elem) != 0) { + uint32_t wcs[2] = { wc, 0 }; + /* We have to allocate an entry. */ - elem = new_element (collate, arg->val.str.startmb, - arg->val.str.lenmb, - arg->val.str.startwc); + elem = new_element (collate, seq != NULL ? seq->bytes : NULL, + wcs, arg->val.str.startmb, arg->val.str.lenmb); /* And add it to the table. */ if (insert_entry (&collate->seq_table, arg->val.str.startmb, arg->val.str.lenmb, elem) != 0) /* This cannot happen. */ - abort (); + assert (! "Internal error"); } } @@ -708,7 +762,229 @@ insert_value (struct linereader *ldfile, struct token *arg, return; } - insert_weights (ldfile, elem, charmap, repertoire, collate); + insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none); +} + + +static void +handle_ellipsis (struct linereader *ldfile, struct token *arg, + enum token_t ellipsis, struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct locale_collate_t *collate) +{ + struct element_t *startp; + struct element_t *endp; + + /* Unlink the entry added for the ellipsis. */ + unlink_element (collate); + startp = collate->cursor; + + /* Process and add the end-entry. */ + if (arg != NULL) + insert_value (ldfile, arg, charmap, repertoire, collate); + + /* Reset the cursor. */ + collate->cursor = startp; + + /* Now we have to handle many different situations: + - we have to distinguish between the three different ellipsis forms + - the is the ellipsis at the beginning, in the middle, or at the end. + */ + endp = collate->cursor->next; + assert (arg == NULL || endp != NULL); + + /* Both, the start and the end symbol, must stand for characters. */ + if ((startp == NULL || startp->name == NULL) + || (endp == NULL || endp->name == NULL)) + { + lr_error (ldfile, _("\ +%s: the start end the end symbol of a range must stand for characters"), + "LC_COLLATE"); + return; + } + + if (ellipsis == tok_ellipsis3) + { + /* XXX */ + } + else + { + /* For symbolic range we naturally must have a beginning and an + end specified by the user. */ + if (startp == NULL) + lr_error (ldfile, _("\ +%s: symbolic range ellipsis must not directly follow `order_start'"), + "LC_COLLATE"); + else if (endp == NULL) + lr_error (ldfile, _("\ +%s: symbolic range ellipsis must not be direct followed by `order_end'"), + "LC_COLLATE"); + else + { + /* Determine the range. To do so we have to determine the + common prefix of the both names and then the numeric + values of both ends. */ + size_t lenfrom = strlen (startp->name); + size_t lento = strlen (endp->name); + char buf[lento + 1]; + int preflen = 0; + long int from; + long int to; + char *cp; + int base = ellipsis == tok_ellipsis2 ? 16 : 10; + + if (lenfrom != lento) + { + invalid_range: + lr_error (ldfile, _("\ +`%s' and `%.*s' are no valid names for symbolic range"), + startp->name, lento, endp->name); + return; + } + + while (startp->name[preflen] == endp->name[preflen]) + if (startp->name[preflen] == '\0') + /* Nothing to be done. The start and end point are identical + and while inserting the end point we have already given + the user an error message. */ + return; + else + ++preflen; + + errno = 0; + from = strtol (startp->name + preflen, &cp, base); + if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0') + goto invalid_range; + + errno = 0; + to = strtol (endp->name + preflen, &cp, base); + if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0') + goto invalid_range; + + /* Copy the prefix. */ + memcpy (buf, startp->name, preflen); + + /* Loop over all values. */ + for (++from; from < to; ++from) + { + struct element_t *elem = NULL; + struct charseq *seq; + uint32_t wc; + int cnt; + + /* Generate the the name. */ + sprintf (buf + preflen, base == 10 ? "%d" : "%x", from); + + /* Look whether this name is already defined. */ + if (find_entry (&collate->seq_table, arg->val.str.startmb, + arg->val.str.lenmb, (void **) &elem) == 0) + { + if (elem->next != NULL || (collate->cursor != NULL + && elem->next == collate->cursor)) + { + lr_error (ldfile, _("\ +%s: order for `%.*s' already defined at %s:%Z"), + "LC_COLLATE", lenfrom, buf, + elem->file, elem->line); + continue; + } + + if (elem->name == NULL) + { + lr_error (ldfile, _("%s: `%s' must be a charater"), + "LC_COLLATE", buf); + continue; + } + } + + if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL)) + { + /* Search for a character of this name. */ + seq = charmap_find_value (charmap, buf, lenfrom); + if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + { + wc = repertoire_find_value (repertoire, buf, lenfrom); + + if (seq != NULL) + seq->ucs4 = wc; + } + else + wc = seq->ucs4; + + if (wc == ILLEGAL_CHAR_VALUE && seq == NULL) + /* We don't know anything about a character with this + name. XXX Should we warn? */ + continue; + + if (elem == NULL) + { + uint32_t wcs[2] = { wc, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, + seq != NULL ? seq->bytes : NULL, + wc == ILLEGAL_CHAR_VALUE + ? NULL : wcs, + buf, lenfrom); + } + else + { + /* Update the element. */ + if (seq != NULL) + elem->mbs = obstack_copy0 (&collate->mempool, + seq->bytes, seq->nbytes); + + if (wc != ILLEGAL_CHAR_VALUE) + { + uint32_t zero = 0; + + obstack_grow (&collate->mempool, + &wc, sizeof (uint32_t)); + obstack_grow (&collate->mempool, + &zero, sizeof (uint32_t)); + elem->wcs = obstack_finish (&collate->mempool); + } + } + + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + } + + /* Enqueue the new element. */ + elem->last = collate->cursor; + elem->next = collate->cursor->next; + elem->last->next = elem; + if (elem->next != NULL) + elem->next->last = elem; + collate->cursor = elem; + + /* Now add the weights. They come from the `ellipsis_weights' + member of `collate'. */ + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, + nrules * sizeof (struct element_list_t)); + for (cnt = 0; cnt < nrules; ++cnt) + if (collate->ellipsis_weight.weights[cnt].cnt == 1 + && (collate->ellipsis_weight.weights[cnt].w[0] + == ELEMENT_ELLIPSIS2)) + { + elem->weights[cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + sizeof (struct element_t *)); + elem->weights[cnt].w[0] = elem; + elem->weights[cnt].cnt = 1; + } + else + { + /* Simly use the weight from `ellipsis_weight'. */ + elem->weights[cnt].w = + collate->ellipsis_weight.weights[cnt].w; + elem->weights[cnt].cnt = + collate->ellipsis_weight.weights[cnt].cnt; + } + } + } + } } @@ -768,7 +1044,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result, struct token *arg = NULL; enum token_t nowtok; int state = 0; - int was_ellipsis = 0; + enum token_t was_ellipsis = tok_none; struct localedef_t *copy_locale = NULL; /* Get the repertoire we have to use. */ @@ -981,10 +1257,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result, if (insert_entry (&collate->elem_table, symbol, symbol_len, new_element (collate, - arg->val.str.startmb, - arg->val.str.lenmb, - arg->val.str.startwc)) - < 0) + NULL, NULL, NULL, 0)) < 0) lr_error (ldfile, _("\ error while adding collating element")); } @@ -1244,6 +1517,12 @@ error while adding equivalent collating symbol")); if (state != 1) goto err_label; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis) + /* XXX */ + abort (); + state = 2; lr_ignore_rest (ldfile, 1); break; @@ -1257,7 +1536,18 @@ error while adding equivalent collating symbol")); break; } - if (state != 2 && state != 3) + if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + state = 2; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis) + /* XXX */ + abort (); + } + else if (state != 2 && state != 3) goto err_label; state = 3; @@ -1313,7 +1603,24 @@ error while adding equivalent collating symbol")); break; } - if (state != 2 && state != 4) + if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + state = 2; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis) + /* XXX */ + abort (); + } + else if (state == 3) + { + error (0, 0, _("%s: missing `reorder-end' keyword"), + "LC_COLLATE"); + state = 4; + } + else if (state != 2 && state != 4) goto err_label; state = 5; @@ -1490,6 +1797,23 @@ error while adding equivalent collating symbol")); read_directions (ldfile, arg, charmap, repertoire, collate); } + break; + } + else if (was_ellipsis != tok_none) + { + /* Using the information in the `ellipsis_weight' + element and this and the last value we have to handle + the ellipsis now. */ + assert (state == 1); + + handle_ellipsis (ldfile, arg, was_ellipsis, charmap, repertoire, + collate); + + /* Remember that we processed the ellipsis. */ + was_ellipsis = tok_none; + + /* And don't add the value a second time. */ + break; } /* Now insert in the new place. */ @@ -1508,37 +1832,48 @@ error while adding equivalent collating symbol")); if (state != 1) goto err_label; + if (was_ellipsis != tok_none) + { + lr_error (ldfile, + _("%s: cannot have `%s' as end of ellipsis range"), + "LC_COLLATE", "UNDEFINED"); + + unlink_element (collate); + was_ellipsis = tok_none; + } + /* See whether UNDEFINED already appeared somewhere. */ if (collate->undefined.next != NULL || (collate->cursor != NULL && collate->undefined.next == collate->cursor)) { - lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"), - 9, "UNDEFINED", collate->undefined.file, + lr_error (ldfile, + _("%s: order for `%.*s' already defined at %s:%Z"), + "LC_COLLATE", 9, "UNDEFINED", collate->undefined.file, collate->undefined.line); lr_ignore_rest (ldfile, 0); } else /* Parse the weights. */ insert_weights (ldfile, &collate->undefined, charmap, - repertoire, collate); + repertoire, collate, tok_none); break; + case tok_ellipsis2: case tok_ellipsis3: - /* Ignore the rest of the line if we don't need the input of - this line. */ - if (ignore_content) - { - lr_ignore_rest (ldfile, 0); - break; - } + case tok_ellipsis4: + /* This is the symbolic (decimal or hexadecimal) or absolute + ellipsis. */ + if (was_ellipsis != tok_none) + goto err_label; if (state != 1 && state != 3) goto err_label; - was_ellipsis = 1; - /* XXX Read the remainder of the line and remember what are - the weights. */ + was_ellipsis = nowtok; + + insert_weights (ldfile, &collate->ellipsis_weight, charmap, + repertoire, collate, nowtok); break; case tok_end: @@ -1552,8 +1887,15 @@ error while adding equivalent collating symbol")); _("%s: empty category description not allowed"), "LC_COLLATE"); else if (state == 1) - lr_error (ldfile, _("%s: missing `order_end' keyword"), - "LC_COLLATE"); + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + + /* Handle ellipsis at end of list. */ + if (was_ellipsis) + /* XXX */ + abort (); + } else if (state == 3) error (0, 0, _("%s: missing `reorder-end' keyword"), "LC_COLLATE"); @@ -1585,2179 +1927,3 @@ error while adding equivalent collating symbol")); /* When we come here we reached the end of the file. */ lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); } - - -#if 0 - -/* What kind of symbols get defined? */ -enum coll_symbol -{ - undefined, - ellipsis, - character, - element, - symbol -}; - - -typedef struct patch_t -{ - const char *fname; - size_t lineno; - const char *token; - union - { - unsigned int *pos; - size_t idx; - } where; - struct patch_t *next; -} patch_t; - - -typedef struct element_t -{ - const char *namemb; - const uint32_t *namewc; - unsigned int this_weight; - - struct element_t *next; - - unsigned int *ordering; - size_t ordering_len; -} element_t; - - -/* The real definition of the struct for the LC_COLLATE locale. */ -struct locale_collate_t -{ - /* Collate symbol table. Simple mapping to number. */ - hash_table symbols; - - /* The collation elements. */ - hash_table elements; - struct obstack element_mem; - - /* The result tables. */ - hash_table resultmb; - hash_table resultwc; - - /* Sorting rules given in order_start line. */ - uint32_t nrules; - enum coll_sort_rule *rules; - - /* Used while recognizing symbol composed of multiple tokens - (collating-element). */ - const char *combine_token; - size_t combine_token_len; - - /* How many sorting order specifications so far. */ - unsigned int order_cnt; - - /* Was lastline ellipsis? */ - int was_ellipsis; - /* Value of last entry if was character. */ - uint32_t last_char; - /* Current element. */ - element_t *current_element; - /* What kind of symbol is current element. */ - enum coll_symbol kind; - - /* Patch lists. */ - patch_t *current_patch; - patch_t *all_patches; - - /* Room for the UNDEFINED information. */ - element_t undefined; - unsigned int undefined_len; - - /* Script information. */ - const char **scripts; - unsigned int nscripts; -}; - - -/* Be verbose? Defined in localedef.c. */ -extern int verbose; - - - -#define obstack_chunk_alloc malloc -#define obstack_chunk_free free - - -/* Prototypes for local functions. */ -static void collate_startup (struct linereader *ldfil |
