aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-09-28 05:28:44 +0000
committerUlrich Drepper <drepper@redhat.com>2002-09-28 05:28:44 +0000
commit0742e48e18a42177d1db91d7ef88967deb544051 (patch)
treebca5b8710203c602487926a8eb40fa1815c438b7
parent0e312a828297d11d5eee354bbf8a564c6f12c0d4 (diff)
downloadglibc-0742e48e18a42177d1db91d7ef88967deb544051.tar.xz
glibc-0742e48e18a42177d1db91d7ef88967deb544051.zip
Update.
2002-09-27 Ulrich Drepper <drepper@redhat.com> * locales/zh_TW: Use shorter forms for abday and day. Patch by Rex Tsai <chihchun@kalug.linux.org.tw>.
-rw-r--r--localedata/ChangeLog5
-rw-r--r--localedata/locales/zh_TW20
-rw-r--r--posix/regcomp.c29
-rw-r--r--posix/regex_internal.c48
-rw-r--r--posix/regex_internal.h29
-rw-r--r--posix/regexec.c1012
-rw-r--r--sysdeps/hppa/dl-machine.h48
7 files changed, 880 insertions, 311 deletions
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index bed3cef764..ffdd5ba45d 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,3 +1,8 @@
+2002-09-27 Ulrich Drepper <drepper@redhat.com>
+
+ * locales/zh_TW: Use shorter forms for abday and day.
+ Patch by Rex Tsai <chihchun@kalug.linux.org.tw>.
+
2002-09-23 Roland McGrath <roland@redhat.com>
* tst-xlocale1.c (main): int -> size_t for counter.
diff --git a/localedata/locales/zh_TW b/localedata/locales/zh_TW
index 5dc9ca65f7..aaeaaa2f89 100644
--- a/localedata/locales/zh_TW
+++ b/localedata/locales/zh_TW
@@ -82,16 +82,16 @@ END LC_NUMERIC
LC_TIME
% day: Sun, Mon, Tue, Wed, Thr, Fri, Sat
-abday "<U9031><U65E5>";"<U9031><U4E00>";"<U9031><U4E8C>";"<U9031><U4E09>";/
- "<U9031><U56DB>";"<U9031><U4E94>";"<U9031><U516D>"
-
-day "<U661F><U671F><U65E5>";/
- "<U661F><U671F><U4E00>";/
- "<U661F><U671F><U4E8C>";/
- "<U661F><U671F><U4E09>";/
- "<U661F><U671F><U56DB>";/
- "<U661F><U671F><U4E94>";/
- "<U661F><U671F><U516D>"
+abday "<U65E5>";"<U4E00>";"<U4E8C>";"<U4E09>";/
+ "<U56DB>";"<U4E94>";"<U516D>"
+
+day "<U9031><U65E5>";/
+ "<U9031><U4E00>";/
+ "<U9031><U4E8C>";/
+ "<U9031><U4E09>";/
+ "<U9031><U56DB>";/
+ "<U9031><U4E94>";/
+ "<U9031><U516D>"
% month: Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec.
abmon "<U0020><U0031><U6708>";"<U0020><U0032><U6708>";/
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 258e255b34..7917c64727 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -636,6 +636,9 @@ regfree (preg)
if (dfa->word_char != NULL)
re_free (dfa->word_char);
+#ifdef DEBUG
+ re_free (dfa->re_str);
+#endif
re_free (dfa);
}
re_free (preg->fastmap);
@@ -740,6 +743,10 @@ re_compile_internal (preg, pattern, length, syntax)
preg->buffer = NULL;
return err;
}
+#ifdef DEBUG
+ dfa->re_str = re_malloc (char, length + 1);
+ strncpy (dfa->re_str, pattern, length + 1);
+#endif
err = re_string_construct (&regexp, pattern, length, preg->translate,
syntax & RE_ICASE);
@@ -874,6 +881,25 @@ create_initial_state (dfa)
{
int node_idx = init_nodes.elems[i];
re_token_type_t type = dfa->nodes[node_idx].type;
+
+ int clexp_idx;
+ int entity = (type != OP_CONTEXT_NODE ? node_idx
+ : dfa->nodes[node_idx].opr.ctx_info->entity);
+ if ((type != OP_CONTEXT_NODE
+ || (dfa->nodes[entity].type != OP_BACK_REF))
+ && (type != OP_BACK_REF))
+ continue;
+ for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+ {
+ re_token_t *clexp_node;
+ clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+ if (clexp_node->type == OP_CLOSE_SUBEXP
+ && clexp_node->opr.idx + 1 == dfa->nodes[entity].opr.idx)
+ break;
+ }
+ if (clexp_idx == init_nodes.nelem)
+ continue;
+
if (type == OP_CONTEXT_NODE
&& (dfa->nodes[dfa->nodes[node_idx].opr.ctx_info->entity].type
== OP_BACK_REF))
@@ -1816,6 +1842,7 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err)
tree = create_tree (tree, branch, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
return *err = REG_ESPACE, NULL;
+ dfa->has_plural_match = 1;
}
return tree;
}
@@ -2035,6 +2062,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
if (BE (*err != REG_NOERROR && tree == NULL, 0))
return NULL;
+ dfa->has_plural_match = 1;
}
return tree;
@@ -2919,6 +2947,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
if (BE (new_idx == -1 || mbc_tree == NULL, 0))
goto parse_bracket_exp_espace;
/* Then join them by ALT node. */
+ dfa->has_plural_match = 1;
alt_token.type = OP_ALT;
new_idx = re_dfa_add_node (dfa, alt_token, 0);
work_tree = create_tree (work_tree, mbc_tree, 0, new_idx);
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 116543a6da..52540dcea6 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -620,50 +620,6 @@ re_node_set_init_copy (dest, src)
return REG_NOERROR;
}
-/* Calculate the intersection of the sets SRC1 and SRC2. And store it in
- DEST. Return value indicate the error code or REG_NOERROR if succeeded.
- Note: We assume dest->elems is NULL, when dest->alloc is 0. */
-
-static reg_errcode_t
-re_node_set_intersect (dest, src1, src2)
- re_node_set *dest;
- const re_node_set *src1, *src2;
-{
- int i1, i2, id;
- if (src1->nelem > 0 && src2->nelem > 0)
- {
- if (src1->nelem + src2->nelem > dest->alloc)
- {
- dest->alloc = src1->nelem + src2->nelem;
- dest->elems = re_realloc (dest->elems, int, dest->alloc);
- if (BE (dest->elems == NULL, 0))
- return REG_ESPACE;
- }
- }
- else
- {
- /* The intersection of empty sets is also empty set. */
- dest->nelem = 0;
- return REG_NOERROR;
- }
-
- for (i1 = i2 = id = 0; i1 < src1->nelem && i2 < src2->nelem; )
- {
- if (src1->elems[i1] > src2->elems[i2])
- {
- ++i2;
- continue;
- }
- /* The intersection must have the elements which are in both of
- SRC1 and SRC2. */
- if (src1->elems[i1] == src2->elems[i2])
- dest->elems[id++] = src2->elems[i2++];
- ++i1;
- }
- dest->nelem = id;
- return REG_NOERROR;
-}
-
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
DEST. Return value indicate the error code or REG_NOERROR if succeeded.
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
@@ -912,7 +868,7 @@ re_node_set_compare (set1, set2)
return 1;
}
-/* Return 1 if SET contains the element ELEM, return 0 otherwise. */
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
static int
re_node_set_contains (set, elem)
@@ -934,7 +890,7 @@ re_node_set_contains (set, elem)
else
right = mid;
}
- return set->elems[idx] == elem;
+ return set->elems[idx] == elem ? idx + 1 : 0;
}
static void
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 9f1f9826f2..cc6584561c 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -407,8 +407,9 @@ struct re_state_table_entry
struct re_backref_cache_entry
{
int node;
- int from;
- int to;
+ int str_idx;
+ int subexp_from;
+ int subexp_to;
int flag;
};
@@ -427,9 +428,24 @@ typedef struct
int nbkref_ents;
int abkref_ents;
struct re_backref_cache_entry *bkref_ents;
- int max_bkref_len;
+ int max_mb_elem_len;
} re_match_context_t;
+typedef struct
+{
+ int cur_bkref;
+ int cls_subexp_idx;
+
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **limited_states;
+
+ re_node_set limits;
+
+ int last_node;
+ int last_str_idx;
+ int check_subexp;
+} re_sift_context_t;
+
struct re_dfa_t
{
re_bitset_ptr_t word_char;
@@ -459,6 +475,10 @@ struct re_dfa_t
/* If this dfa has "multibyte node", which is a backreference or
a node which can accept multibyte character or multi character
collating element. */
+#ifdef DEBUG
+ char* re_str;
+#endif
+ unsigned int has_plural_match : 1;
unsigned int has_mb_node : 1;
};
typedef struct re_dfa_t re_dfa_t;
@@ -470,9 +490,6 @@ static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1,
#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
static reg_errcode_t re_node_set_init_copy (re_node_set *dest,
const re_node_set *src);
-static reg_errcode_t re_node_set_intersect (re_node_set *dest,
- const re_node_set *src1,
- const re_node_set *src2);
static reg_errcode_t re_node_set_add_intersect (re_node_set *dest,
const re_node_set *src1,
const re_node_set *src2);
diff --git a/posix/regexec.c b/posix/regexec.c
index 142127883d..88b20dd31f 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -47,7 +47,11 @@ static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
re_string_t *input, int n);
static void match_ctx_free (re_match_context_t *cache);
static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
- int from, int to);
+ int str_idx, int from, int to);
+static void match_ctx_clear_flag (re_match_context_t *mctx);
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, int last_node,
+ int last_str_idx, int check_subexp);
static reg_errcode_t re_search_internal (const regex_t *preg,
const char *string, int length,
int start, int range, int stop,
@@ -77,7 +81,7 @@ static int check_halt_state_context (const regex_t *preg,
const re_match_context_t *mctx, int idx);
static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
int cur_idx, int nmatch);
-static int proceed_next_node (const regex_t *preg,
+static int proceed_next_node (const regex_t *preg, regmatch_t *regs,
const re_match_context_t *mctx,
int *pidx, int node, re_node_set *eps_via_nodes);
static reg_errcode_t set_regs (const regex_t *preg,
@@ -86,22 +90,41 @@ static reg_errcode_t set_regs (const regex_t *preg,
#ifdef RE_ENABLE_I18N
static int sift_states_iter_mb (const regex_t *preg,
const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
int node_idx, int str_idx, int max_str_idx);
#endif /* RE_ENABLE_I18N */
-static int sift_states_iter_bkref (const re_dfa_t *dfa,
- re_dfastate_t **state_log,
- struct re_backref_cache_entry *mctx_entry,
- int node_idx, int idx, int match_last);
static reg_errcode_t sift_states_backward (const regex_t *preg,
- const re_match_context_t *mctx,
- int last_node);
+ re_match_context_t *mctx,
+ re_sift_context_t *sctx);
+static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
+ re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx,
+ re_node_set *dest_nodes);
+static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates);
+static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
+ re_node_set *dest_nodes,
+ const re_node_set *and_nodes);
+static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates,
+ re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents,
+ int str_idx);
+static reg_errcode_t search_subexp (const regex_t *preg,
+ re_match_context_t *mctx,
+ re_sift_context_t *sctx, int str_idx,
+ re_node_set *dest_nodes);
+static reg_errcode_t sift_states_bkref (const regex_t *preg,
+ re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx, re_node_set *dest_nodes);
static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
int next_state_log_idx);
-static reg_errcode_t add_epsilon_backreference (const re_dfa_t *dfa,
- const re_match_context_t *mctx,
- const re_node_set *plog,
- int idx,
- re_node_set *state_buf);
+static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
+ re_dfastate_t **src, int num);
static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
re_match_context_t *mctx,
re_dfastate_t *state, int fl_search);
@@ -633,7 +656,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
{
/* It seems to be appropriate one, then use the matcher. */
/* We assume that the matching starts from 0. */
- mctx.state_log_top = mctx.nbkref_ents = mctx.max_bkref_len = 0;
+ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
match_last = check_matching (preg, &mctx, 0, fl_longest_match);
if (match_last != -1)
{
@@ -667,15 +690,45 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
{
/* We need the ranges of all the subexpressions. */
int halt_node;
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **lim_states = NULL;
re_dfastate_t *pstate = mctx.state_log[match_last];
+ re_sift_context_t sctx;
#ifdef DEBUG
assert (mctx.state_log != NULL);
#endif
halt_node = check_halt_state_context (preg, pstate, &mctx,
match_last);
- err = sift_states_backward (preg, &mctx, halt_node);
- if (BE (err != REG_NOERROR, 0))
- return err;
+ if (dfa->has_plural_match)
+ {
+ match_ctx_clear_flag (&mctx);
+ sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (sifted_states == NULL, 0))
+ return REG_ESPACE;
+ if (dfa->nbackref)
+ {
+ lim_states = calloc (sizeof (re_dfastate_t *),
+ match_last + 1);
+ if (BE (lim_states == NULL, 0))
+ return REG_ESPACE;
+ }
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+ mctx.match_last, 0);
+ err = sift_states_backward (preg, &mctx, &sctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ if (lim_states != NULL)
+ {
+ err = merge_state_array (dfa, sifted_states, lim_states,
+ match_last + 1);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ re_free (lim_states);
+ }
+ re_node_set_free (&sctx.limits);
+ re_free (mctx.state_log);
+ mctx.state_log = sifted_states;
+ }
err = set_regs (preg, &mctx, nmatch, pmatch, halt_node);
if (BE (err != REG_NOERROR, 0))
return err;
@@ -695,6 +748,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
if (dfa->nbackref)
match_ctx_free (&mctx);
re_string_destruct (&input);
+
return (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
}
@@ -766,6 +820,39 @@ check_matching (preg, mctx, fl_search, fl_longest_match)
return -2;
if (mctx->state_log != NULL)
mctx->state_log[cur_str_idx] = cur_state;
+
+ if (cur_state->has_backref)
+ {
+ int i;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ for (i = 0; i < cur_state->nodes.nelem; ++i)
+ {
+ re_token_type_t type;
+ int node = cur_state->nodes.elems[i];
+ int entity = (dfa->nodes[node].type != OP_CONTEXT_NODE ? node
+ : dfa->nodes[node].opr.ctx_info->entity);
+ type = dfa->nodes[entity].type;
+ if (type == OP_BACK_REF)
+ {
+ int clexp_idx;
+ for (clexp_idx = 0; clexp_idx < cur_state->nodes.nelem;
+ ++clexp_idx)
+ {
+ re_token_t *clexp_node;
+ clexp_node = dfa->nodes + cur_state->nodes.elems[clexp_idx];
+ if (clexp_node->type == OP_CLOSE_SUBEXP
+ && clexp_node->opr.idx + 1== dfa->nodes[entity].opr.idx)
+ {
+ err = match_ctx_add_entry (mctx, node, 0, 0, 0);
+ if (BE (err != REG_NOERROR, 0))
+ return -2;
+ break;
+ }
+ }
+ }
+ }
+ }
+
/* If the RE accepts NULL string. */
if (cur_state->halt)
{
@@ -894,8 +981,9 @@ check_halt_state_context (preg, state, mctx, idx)
of errors. */
static int
-proceed_next_node (preg, mctx, pidx, node, eps_via_nodes)
+proceed_next_node (preg, regs, mctx, pidx, node, eps_via_nodes)
const regex_t *preg;
+ regmatch_t *regs;
const re_match_context_t *mctx;
int *pidx, node;
re_node_set *eps_via_nodes;
@@ -956,12 +1044,9 @@ proceed_next_node (preg, mctx, pidx, node, eps_via_nodes)
#endif /* RE_ENABLE_I18N */
if (type == OP_BACK_REF)
{
- for (i = 0; i < mctx->nbkref_ents; ++i)
- {
- if (mctx->bkref_ents[i].node == node
- && mctx->bkref_ents[i].from == *pidx)
- naccepted = mctx->bkref_ents[i].to - *pidx;
- }
+ int subexp_idx = dfa->nodes[entity].opr.idx;
+ naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
+
if (naccepted == 0)
{
err = re_node_set_insert (eps_via_nodes, node);
@@ -1031,7 +1116,8 @@ set_regs (preg, mctx, nmatch, pmatch, last_node)
break;
/* Proceed to next node. */
- cur_node = proceed_next_node (preg, mctx, &idx, cur_node, &eps_via_nodes);
+ cur_node = proceed_next_node (preg, pmatch, mctx, &idx, cur_node,
+ &eps_via_nodes);
if (BE (cur_node < 0, 0))
return REG_ESPACE;
}
@@ -1061,11 +1147,11 @@ update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
else if (type == OP_CLOSE_SUBEXP)
/* We are at the first node of this sub expression. */
pmatch[reg_num].rm_eo = cur_idx;
- }
+}
#define NUMBER_OF_STATE 1
-/* This function checks the STATE_LOG from the MCTX->match_last to 0
+/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
and sift the nodes in each states according to the following rules.
Updated state_log will be wrote to STATE_LOG.
@@ -1089,124 +1175,104 @@ update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
((state) != NULL && re_node_set_contains (&(state)->nodes, node))
static reg_errcode_t
-sift_states_backward (preg, mctx, last_node)
- const regex_t *preg;
- const re_match_context_t *mctx;
- int last_node;
+sift_states_backward (preg, mctx, sctx)
+ const regex_t *preg;
+ re_match_context_t *mctx;
+ re_sift_context_t *sctx;
{
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
- re_node_set state_buf;
- int str_idx = mctx->match_last;
- re_node_set *plog; /* Points the state_log[str_idx]->nodes */
+ int null_cnt = 0;
+ int str_idx = sctx->last_str_idx;
+ re_node_set cur_dest;
+ re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */
#ifdef DEBUG
assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
#endif
- err = re_node_set_alloc (&state_buf, NUMBER_OF_STATE);
- if (BE (err != REG_NOERROR, 0))
- return err;
- plog = &mctx->state_log[str_idx]->nodes;
+ cur_src = &mctx->state_log[str_idx]->nodes;
/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
transit to the last_node and the last_node itself. */
- err = re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node);
+ err = re_node_set_init_1 (&cur_dest, sctx->last_node);
if (BE (err != REG_NOERROR, 0))
return err;
-
- if (mctx->state_log[str_idx] != NULL
- && mctx->state_log[str_idx]->has_backref)
- {
- err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
-
- /* Update state log. */
- mctx->state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
- if (BE (mctx->state_log[str_idx] == NULL && err != REG_NOERROR, 0))
+ err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
return err;
/* Then check each states in the state_log. */
while (str_idx > 0)
{
- int i, j;
+ int i, ret;
/* Update counters. */
- re_node_set_empty (&state_buf);
+ null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
+ if (null_cnt > mctx->max_mb_elem_len)
+ {
+ memset (sctx->sifted_states, '\0',
+ sizeof (re_dfastate_t *) * str_idx);
+ return REG_NOERROR;
+ }
+ re_node_set_empty (&cur_dest);
--str_idx;
- plog = ((mctx->state_log[str_idx] == NULL) ? &empty_set
- : &mctx->state_log[str_idx]->nodes);
+ cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+ : &mctx->state_log[str_idx]->nodes);
/* Then build the next sifted state.
- We build the next sifted state on `state_buf', and update
- `state_log[str_idx]' with `state_buf'.
+ We build the next sifted state on `cur_dest', and update
+ `sifted_st