aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-12-13 07:40:47 +0000
committerUlrich Drepper <drepper@redhat.com>1999-12-13 07:40:47 +0000
commit440a52ea7b427b7a5668c77283825cae20d7fc3c (patch)
tree770813c20cccd136ec929af19dc1a28f1d752036
parentd3e7b47c904c27a5ec25bb8afd2c37c6cdc5baa7 (diff)
downloadglibc-440a52ea7b427b7a5668c77283825cae20d7fc3c.tar.xz
glibc-440a52ea7b427b7a5668c77283825cae20d7fc3c.zip
Update.
1999-12-12 Ulrich Drepper <drepper@cygnus.com> * locale/programs/ld-collate.c (collate_read): Make symbolic ellipsis work. Remove old and unused code.
-rw-r--r--ChangeLog5
-rw-r--r--locale/programs/ld-collate.c2610
2 files changed, 393 insertions, 2222 deletions
diff --git a/ChangeLog b/ChangeLog
index ad946bd90b..e08445df95 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+1999-12-12 Ulrich Drepper <drepper@cygnus.com>
+
+ * locale/programs/ld-collate.c (collate_read): Make symbolic
+ ellipsis work. Remove old and unused code.
+
1999-12-11 Ulrich Drepper <drepper@cygnus.com>
* locale/programs/ld-collate.c (collate_read): Implement
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index e1c4d14f9e..42fd601064 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -21,8 +21,10 @@
# include <config.h>
#endif
+#include <errno.h>
#include <error.h>
#include <stdlib.h>
+#include <wchar.h>
#include "charmap.h"
#include "localeinfo.h"
@@ -67,6 +69,8 @@ struct element_list_t
/* Data type for collating element. */
struct element_t
{
+ const char *name;
+
const char *mbs;
const uint32_t *wcs;
int order;
@@ -85,6 +89,11 @@ struct element_t
struct element_t *next;
};
+/* Special element value. */
+#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
+#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
+#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
+
/* Data type for collating symbol. */
struct symbol_t
{
@@ -124,8 +133,8 @@ struct locale_collate_t
/* This is the cursor for `reorder_after' insertions. */
struct element_t *cursor;
- /* Remember whether last weight was an ellipsis. */
- int was_ellipsis;
+ /* This value is used when handling ellipsis. */
+ struct element_t ellipsis_weight;
/* Known collating elements. */
hash_table elem_table;
@@ -168,14 +177,25 @@ make_seclist_elem (struct locale_collate_t *collate, const char *string,
static struct element_t *
new_element (struct locale_collate_t *collate, const char *mbs,
- size_t len, const uint32_t *wcs)
+ const uint32_t *wcs, const char *name, size_t namelen)
{
struct element_t *newp;
newp = (struct element_t *) obstack_alloc (&collate->mempool,
sizeof (*newp));
- newp->mbs = obstack_copy0 (&collate->mempool, mbs, len);
- newp->wcs = wcs;
+ newp->name = name == NULL ? NULL : obstack_copy (&collate->mempool,
+ name, namelen);
+ newp->mbs = mbs;
+ if (wcs != NULL)
+ {
+ size_t nwcs = wcslen ((wchar_t *) wcs) + 1;
+ uint32_t zero = 0;
+ obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
+ obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
+ newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
+ }
+ else
+ newp->wcs = NULL;
newp->order = 0;
/* Will be allocated later. */
@@ -437,14 +457,14 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate,
result = sym->order;
if (result == NULL)
- result = sym->order = new_element (collate, str, len, NULL);
+ result = sym->order = new_element (collate, NULL, NULL, NULL, 0);
}
else if (find_entry (&collate->elem_table, str, len,
(void **) &result) != 0)
{
- /* It's also no collation element. So it is an element defined
- later. */
- result = new_element (collate, str, len, wcstr);
+ /* It's also no collation element. So it is an character
+ element defined later. */
+ result = new_element (collate, NULL, NULL, str, len);
if (result != NULL)
/* Insert it into the sequence table. */
insert_entry (&collate->seq_table, str, len, result);
@@ -456,9 +476,20 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate,
static void
+unlink_element (struct locale_collate_t *collate)
+{
+ if (collate->cursor->next != NULL)
+ collate->cursor->next->last = collate->cursor->last;
+ if (collate->cursor->last != NULL)
+ collate->cursor->last->next = collate->cursor->next;
+ collate->cursor = collate->cursor->last;
+}
+
+
+static void
insert_weights (struct linereader *ldfile, struct element_t *elem,
struct charmap_t *charmap, struct repertoire_t *repertoire,
- struct locale_collate_t *collate)
+ struct locale_collate_t *collate, enum token_t ellipsis)
{
int weight_cnt;
struct token *arg;
@@ -494,7 +525,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
elem->weights[weight_cnt].w = (struct element_t **)
obstack_alloc (&collate->mempool, sizeof (struct element_t *));
elem->weights[weight_cnt].w[0] = NULL;
- elem->weights[weight_cnt].cnt = 0;
+ elem->weights[weight_cnt].cnt = 1;
}
else if (arg->tok == tok_bsymbol)
{
@@ -583,6 +614,24 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
/* We don't need the string anymore. */
free (arg->val.str.startmb);
}
+ else if (ellipsis != tok_none
+ && (arg->tok == tok_ellipsis2
+ || arg->tok == tok_ellipsis3
+ || arg->tok == tok_ellipsis4))
+ {
+ /* It must be the same ellipsis as used in the initial column. */
+ if (arg->tok != ellipsis)
+ lr_error (ldfile, _("\
+%s: weights must use the same ellipsis symbol as the name"),
+ "LC_COLLATE");
+
+ /* The weight for this level has to be ignored. We use the
+ null pointer to indicate this. */
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool, sizeof (struct element_t *));
+ elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
+ elem->weights[weight_cnt].cnt = 1;
+ }
else
{
syntax:
@@ -644,14 +693,20 @@ insert_value (struct linereader *ldfile, struct token *arg,
uint32_t wc;
struct element_t *elem = NULL;
- /* First determine the wide character. There must be such a value,
- otherwise we ignore it (if it is no collatio symbol or element). */
- wc = repertoire_find_value (repertoire, arg->val.str.startmb,
- arg->val.str.lenmb);
-
/* Try to find the character in the charmap. */
seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb);
+ /* Determine the wide character. */
+ if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ {
+ wc = repertoire_find_value (repertoire, arg->val.str.startmb,
+ arg->val.str.lenmb);
+ if (seq != NULL)
+ seq->ucs4 = wc;
+ }
+ else
+ wc = seq->ucs4;
+
if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
{
/* It's no character, so look through the collation elements and
@@ -666,9 +721,7 @@ insert_value (struct linereader *ldfile, struct token *arg,
elem = sym->order;
if (elem == NULL)
- elem = sym->order = new_element (collate, arg->val.str.startmb,
- arg->val.str.lenmb,
- arg->val.str.startwc);
+ elem = sym->order = new_element (collate, NULL, NULL, NULL, 0);
}
else if (find_entry (&collate->elem_table, arg->val.str.startmb,
arg->val.str.lenmb, (void **) &elem) != 0)
@@ -684,16 +737,17 @@ insert_value (struct linereader *ldfile, struct token *arg,
if (find_entry (&collate->seq_table, arg->val.str.startmb,
arg->val.str.lenmb, (void **) &elem) != 0)
{
+ uint32_t wcs[2] = { wc, 0 };
+
/* We have to allocate an entry. */
- elem = new_element (collate, arg->val.str.startmb,
- arg->val.str.lenmb,
- arg->val.str.startwc);
+ elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
+ wcs, arg->val.str.startmb, arg->val.str.lenmb);
/* And add it to the table. */
if (insert_entry (&collate->seq_table, arg->val.str.startmb,
arg->val.str.lenmb, elem) != 0)
/* This cannot happen. */
- abort ();
+ assert (! "Internal error");
}
}
@@ -708,7 +762,229 @@ insert_value (struct linereader *ldfile, struct token *arg,
return;
}
- insert_weights (ldfile, elem, charmap, repertoire, collate);
+ insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none);
+}
+
+
+static void
+handle_ellipsis (struct linereader *ldfile, struct token *arg,
+ enum token_t ellipsis, struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct locale_collate_t *collate)
+{
+ struct element_t *startp;
+ struct element_t *endp;
+
+ /* Unlink the entry added for the ellipsis. */
+ unlink_element (collate);
+ startp = collate->cursor;
+
+ /* Process and add the end-entry. */
+ if (arg != NULL)
+ insert_value (ldfile, arg, charmap, repertoire, collate);
+
+ /* Reset the cursor. */
+ collate->cursor = startp;
+
+ /* Now we have to handle many different situations:
+ - we have to distinguish between the three different ellipsis forms
+ - the is the ellipsis at the beginning, in the middle, or at the end.
+ */
+ endp = collate->cursor->next;
+ assert (arg == NULL || endp != NULL);
+
+ /* Both, the start and the end symbol, must stand for characters. */
+ if ((startp == NULL || startp->name == NULL)
+ || (endp == NULL || endp->name == NULL))
+ {
+ lr_error (ldfile, _("\
+%s: the start end the end symbol of a range must stand for characters"),
+ "LC_COLLATE");
+ return;
+ }
+
+ if (ellipsis == tok_ellipsis3)
+ {
+ /* XXX */
+ }
+ else
+ {
+ /* For symbolic range we naturally must have a beginning and an
+ end specified by the user. */
+ if (startp == NULL)
+ lr_error (ldfile, _("\
+%s: symbolic range ellipsis must not directly follow `order_start'"),
+ "LC_COLLATE");
+ else if (endp == NULL)
+ lr_error (ldfile, _("\
+%s: symbolic range ellipsis must not be direct followed by `order_end'"),
+ "LC_COLLATE");
+ else
+ {
+ /* Determine the range. To do so we have to determine the
+ common prefix of the both names and then the numeric
+ values of both ends. */
+ size_t lenfrom = strlen (startp->name);
+ size_t lento = strlen (endp->name);
+ char buf[lento + 1];
+ int preflen = 0;
+ long int from;
+ long int to;
+ char *cp;
+ int base = ellipsis == tok_ellipsis2 ? 16 : 10;
+
+ if (lenfrom != lento)
+ {
+ invalid_range:
+ lr_error (ldfile, _("\
+`%s' and `%.*s' are no valid names for symbolic range"),
+ startp->name, lento, endp->name);
+ return;
+ }
+
+ while (startp->name[preflen] == endp->name[preflen])
+ if (startp->name[preflen] == '\0')
+ /* Nothing to be done. The start and end point are identical
+ and while inserting the end point we have already given
+ the user an error message. */
+ return;
+ else
+ ++preflen;
+
+ errno = 0;
+ from = strtol (startp->name + preflen, &cp, base);
+ if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
+ goto invalid_range;
+
+ errno = 0;
+ to = strtol (endp->name + preflen, &cp, base);
+ if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
+ goto invalid_range;
+
+ /* Copy the prefix. */
+ memcpy (buf, startp->name, preflen);
+
+ /* Loop over all values. */
+ for (++from; from < to; ++from)
+ {
+ struct element_t *elem = NULL;
+ struct charseq *seq;
+ uint32_t wc;
+ int cnt;
+
+ /* Generate the the name. */
+ sprintf (buf + preflen, base == 10 ? "%d" : "%x", from);
+
+ /* Look whether this name is already defined. */
+ if (find_entry (&collate->seq_table, arg->val.str.startmb,
+ arg->val.str.lenmb, (void **) &elem) == 0)
+ {
+ if (elem->next != NULL || (collate->cursor != NULL
+ && elem->next == collate->cursor))
+ {
+ lr_error (ldfile, _("\
+%s: order for `%.*s' already defined at %s:%Z"),
+ "LC_COLLATE", lenfrom, buf,
+ elem->file, elem->line);
+ continue;
+ }
+
+ if (elem->name == NULL)
+ {
+ lr_error (ldfile, _("%s: `%s' must be a charater"),
+ "LC_COLLATE", buf);
+ continue;
+ }
+ }
+
+ if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
+ {
+ /* Search for a character of this name. */
+ seq = charmap_find_value (charmap, buf, lenfrom);
+ if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ {
+ wc = repertoire_find_value (repertoire, buf, lenfrom);
+
+ if (seq != NULL)
+ seq->ucs4 = wc;
+ }
+ else
+ wc = seq->ucs4;
+
+ if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
+ /* We don't know anything about a character with this
+ name. XXX Should we warn? */
+ continue;
+
+ if (elem == NULL)
+ {
+ uint32_t wcs[2] = { wc, 0 };
+
+ /* We have to allocate an entry. */
+ elem = new_element (collate,
+ seq != NULL ? seq->bytes : NULL,
+ wc == ILLEGAL_CHAR_VALUE
+ ? NULL : wcs,
+ buf, lenfrom);
+ }
+ else
+ {
+ /* Update the element. */
+ if (seq != NULL)
+ elem->mbs = obstack_copy0 (&collate->mempool,
+ seq->bytes, seq->nbytes);
+
+ if (wc != ILLEGAL_CHAR_VALUE)
+ {
+ uint32_t zero = 0;
+
+ obstack_grow (&collate->mempool,
+ &wc, sizeof (uint32_t));
+ obstack_grow (&collate->mempool,
+ &zero, sizeof (uint32_t));
+ elem->wcs = obstack_finish (&collate->mempool);
+ }
+ }
+
+ elem->file = ldfile->fname;
+ elem->line = ldfile->lineno;
+ }
+
+ /* Enqueue the new element. */
+ elem->last = collate->cursor;
+ elem->next = collate->cursor->next;
+ elem->last->next = elem;
+ if (elem->next != NULL)
+ elem->next->last = elem;
+ collate->cursor = elem;
+
+ /* Now add the weights. They come from the `ellipsis_weights'
+ member of `collate'. */
+ elem->weights = (struct element_list_t *)
+ obstack_alloc (&collate->mempool,
+ nrules * sizeof (struct element_list_t));
+ for (cnt = 0; cnt < nrules; ++cnt)
+ if (collate->ellipsis_weight.weights[cnt].cnt == 1
+ && (collate->ellipsis_weight.weights[cnt].w[0]
+ == ELEMENT_ELLIPSIS2))
+ {
+ elem->weights[cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool,
+ sizeof (struct element_t *));
+ elem->weights[cnt].w[0] = elem;
+ elem->weights[cnt].cnt = 1;
+ }
+ else
+ {
+ /* Simly use the weight from `ellipsis_weight'. */
+ elem->weights[cnt].w =
+ collate->ellipsis_weight.weights[cnt].w;
+ elem->weights[cnt].cnt =
+ collate->ellipsis_weight.weights[cnt].cnt;
+ }
+ }
+ }
+ }
}
@@ -768,7 +1044,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
struct token *arg = NULL;
enum token_t nowtok;
int state = 0;
- int was_ellipsis = 0;
+ enum token_t was_ellipsis = tok_none;
struct localedef_t *copy_locale = NULL;
/* Get the repertoire we have to use. */
@@ -981,10 +1257,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
if (insert_entry (&collate->elem_table,
symbol, symbol_len,
new_element (collate,
- arg->val.str.startmb,
- arg->val.str.lenmb,
- arg->val.str.startwc))
- < 0)
+ NULL, NULL, NULL, 0)) < 0)
lr_error (ldfile, _("\
error while adding collating element"));
}
@@ -1244,6 +1517,12 @@ error while adding equivalent collating symbol"));
if (state != 1)
goto err_label;
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis)
+ /* XXX */
+ abort ();
+
state = 2;
lr_ignore_rest (ldfile, 1);
break;
@@ -1257,7 +1536,18 @@ error while adding equivalent collating symbol"));
break;
}
- if (state != 2 && state != 3)
+ if (state == 1)
+ {
+ lr_error (ldfile, _("%s: missing `order_end' keyword"),
+ "LC_COLLATE");
+ state = 2;
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis)
+ /* XXX */
+ abort ();
+ }
+ else if (state != 2 && state != 3)
goto err_label;
state = 3;
@@ -1313,7 +1603,24 @@ error while adding equivalent collating symbol"));
break;
}
- if (state != 2 && state != 4)
+ if (state == 1)
+ {
+ lr_error (ldfile, _("%s: missing `order_end' keyword"),
+ "LC_COLLATE");
+ state = 2;
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis)
+ /* XXX */
+ abort ();
+ }
+ else if (state == 3)
+ {
+ error (0, 0, _("%s: missing `reorder-end' keyword"),
+ "LC_COLLATE");
+ state = 4;
+ }
+ else if (state != 2 && state != 4)
goto err_label;
state = 5;
@@ -1490,6 +1797,23 @@ error while adding equivalent collating symbol"));
read_directions (ldfile, arg, charmap, repertoire,
collate);
}
+ break;
+ }
+ else if (was_ellipsis != tok_none)
+ {
+ /* Using the information in the `ellipsis_weight'
+ element and this and the last value we have to handle
+ the ellipsis now. */
+ assert (state == 1);
+
+ handle_ellipsis (ldfile, arg, was_ellipsis, charmap, repertoire,
+ collate);
+
+ /* Remember that we processed the ellipsis. */
+ was_ellipsis = tok_none;
+
+ /* And don't add the value a second time. */
+ break;
}
/* Now insert in the new place. */
@@ -1508,37 +1832,48 @@ error while adding equivalent collating symbol"));
if (state != 1)
goto err_label;
+ if (was_ellipsis != tok_none)
+ {
+ lr_error (ldfile,
+ _("%s: cannot have `%s' as end of ellipsis range"),
+ "LC_COLLATE", "UNDEFINED");
+
+ unlink_element (collate);
+ was_ellipsis = tok_none;
+ }
+
/* See whether UNDEFINED already appeared somewhere. */
if (collate->undefined.next != NULL
|| (collate->cursor != NULL
&& collate->undefined.next == collate->cursor))
{
- lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"),
- 9, "UNDEFINED", collate->undefined.file,
+ lr_error (ldfile,
+ _("%s: order for `%.*s' already defined at %s:%Z"),
+ "LC_COLLATE", 9, "UNDEFINED", collate->undefined.file,
collate->undefined.line);
lr_ignore_rest (ldfile, 0);
}
else
/* Parse the weights. */
insert_weights (ldfile, &collate->undefined, charmap,
- repertoire, collate);
+ repertoire, collate, tok_none);
break;
+ case tok_ellipsis2:
case tok_ellipsis3:
- /* Ignore the rest of the line if we don't need the input of
- this line. */
- if (ignore_content)
- {
- lr_ignore_rest (ldfile, 0);
- break;
- }
+ case tok_ellipsis4:
+ /* This is the symbolic (decimal or hexadecimal) or absolute
+ ellipsis. */
+ if (was_ellipsis != tok_none)
+ goto err_label;
if (state != 1 && state != 3)
goto err_label;
- was_ellipsis = 1;
- /* XXX Read the remainder of the line and remember what are
- the weights. */
+ was_ellipsis = nowtok;
+
+ insert_weights (ldfile, &collate->ellipsis_weight, charmap,
+ repertoire, collate, nowtok);
break;
case tok_end:
@@ -1552,8 +1887,15 @@ error while adding equivalent collating symbol"));
_("%s: empty category description not allowed"),
"LC_COLLATE");
else if (state == 1)
- lr_error (ldfile, _("%s: missing `order_end' keyword"),
- "LC_COLLATE");
+ {
+ lr_error (ldfile, _("%s: missing `order_end' keyword"),
+ "LC_COLLATE");
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis)
+ /* XXX */
+ abort ();
+ }
else if (state == 3)
error (0, 0, _("%s: missing `reorder-end' keyword"),
"LC_COLLATE");
@@ -1585,2179 +1927,3 @@ error while adding equivalent collating symbol"));
/* When we come here we reached the end of the file. */
lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
}
-
-
-#if 0
-
-/* What kind of symbols get defined? */
-enum coll_symbol
-{
- undefined,
- ellipsis,
- character,
- element,
- symbol
-};
-
-
-typedef struct patch_t
-{
- const char *fname;
- size_t lineno;
- const char *token;
- union
- {
- unsigned int *pos;
- size_t idx;
- } where;
- struct patch_t *next;
-} patch_t;
-
-
-typedef struct element_t
-{
- const char *namemb;
- const uint32_t *namewc;
- unsigned int this_weight;
-
- struct element_t *next;
-
- unsigned int *ordering;
- size_t ordering_len;
-} element_t;
-
-
-/* The real definition of the struct for the LC_COLLATE locale. */
-struct locale_collate_t
-{
- /* Collate symbol table. Simple mapping to number. */
- hash_table symbols;
-
- /* The collation elements. */
- hash_table elements;
- struct obstack element_mem;
-
- /* The result tables. */
- hash_table resultmb;
- hash_table resultwc;
-
- /* Sorting rules given in order_start line. */
- uint32_t nrules;
- enum coll_sort_rule *rules;
-
- /* Used while recognizing symbol composed of multiple tokens
- (collating-element). */
- const char *combine_token;
- size_t combine_token_len;
-
- /* How many sorting order specifications so far. */
- unsigned int order_cnt;
-
- /* Was lastline ellipsis? */
- int was_ellipsis;
- /* Value of last entry if was character. */
- uint32_t last_char;
- /* Current element. */
- element_t *current_element;
- /* What kind of symbol is current element. */
- enum coll_symbol kind;
-
- /* Patch lists. */
- patch_t *current_patch;
- patch_t *all_patches;
-
- /* Room for the UNDEFINED information. */
- element_t undefined;
- unsigned int undefined_len;
-
- /* Script information. */
- const char **scripts;
- unsigned int nscripts;
-};
-
-
-/* Be verbose? Defined in localedef.c. */
-extern int verbose;
-
-
-
-#define obstack_chunk_alloc malloc
-#define obstack_chunk_free free
-
-
-/* Prototypes for local functions. */
-static void collate_startup (struct linereader *ldfil