diff options
| author | Ulrich Drepper <drepper@redhat.com> | 2002-09-28 05:28:44 +0000 |
|---|---|---|
| committer | Ulrich Drepper <drepper@redhat.com> | 2002-09-28 05:28:44 +0000 |
| commit | 0742e48e18a42177d1db91d7ef88967deb544051 (patch) | |
| tree | bca5b8710203c602487926a8eb40fa1815c438b7 | |
| parent | 0e312a828297d11d5eee354bbf8a564c6f12c0d4 (diff) | |
| download | glibc-0742e48e18a42177d1db91d7ef88967deb544051.tar.xz glibc-0742e48e18a42177d1db91d7ef88967deb544051.zip | |
Update.
2002-09-27 Ulrich Drepper <drepper@redhat.com>
* locales/zh_TW: Use shorter forms for abday and day.
Patch by Rex Tsai <chihchun@kalug.linux.org.tw>.
| -rw-r--r-- | localedata/ChangeLog | 5 | ||||
| -rw-r--r-- | localedata/locales/zh_TW | 20 | ||||
| -rw-r--r-- | posix/regcomp.c | 29 | ||||
| -rw-r--r-- | posix/regex_internal.c | 48 | ||||
| -rw-r--r-- | posix/regex_internal.h | 29 | ||||
| -rw-r--r-- | posix/regexec.c | 1012 | ||||
| -rw-r--r-- | sysdeps/hppa/dl-machine.h | 48 |
7 files changed, 880 insertions, 311 deletions
diff --git a/localedata/ChangeLog b/localedata/ChangeLog index bed3cef764..ffdd5ba45d 100644 --- a/localedata/ChangeLog +++ b/localedata/ChangeLog @@ -1,3 +1,8 @@ +2002-09-27 Ulrich Drepper <drepper@redhat.com> + + * locales/zh_TW: Use shorter forms for abday and day. + Patch by Rex Tsai <chihchun@kalug.linux.org.tw>. + 2002-09-23 Roland McGrath <roland@redhat.com> * tst-xlocale1.c (main): int -> size_t for counter. diff --git a/localedata/locales/zh_TW b/localedata/locales/zh_TW index 5dc9ca65f7..aaeaaa2f89 100644 --- a/localedata/locales/zh_TW +++ b/localedata/locales/zh_TW @@ -82,16 +82,16 @@ END LC_NUMERIC LC_TIME % day: Sun, Mon, Tue, Wed, Thr, Fri, Sat -abday "<U9031><U65E5>";"<U9031><U4E00>";"<U9031><U4E8C>";"<U9031><U4E09>";/ - "<U9031><U56DB>";"<U9031><U4E94>";"<U9031><U516D>" - -day "<U661F><U671F><U65E5>";/ - "<U661F><U671F><U4E00>";/ - "<U661F><U671F><U4E8C>";/ - "<U661F><U671F><U4E09>";/ - "<U661F><U671F><U56DB>";/ - "<U661F><U671F><U4E94>";/ - "<U661F><U671F><U516D>" +abday "<U65E5>";"<U4E00>";"<U4E8C>";"<U4E09>";/ + "<U56DB>";"<U4E94>";"<U516D>" + +day "<U9031><U65E5>";/ + "<U9031><U4E00>";/ + "<U9031><U4E8C>";/ + "<U9031><U4E09>";/ + "<U9031><U56DB>";/ + "<U9031><U4E94>";/ + "<U9031><U516D>" % month: Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec. abmon "<U0020><U0031><U6708>";"<U0020><U0032><U6708>";/ diff --git a/posix/regcomp.c b/posix/regcomp.c index 258e255b34..7917c64727 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -636,6 +636,9 @@ regfree (preg) if (dfa->word_char != NULL) re_free (dfa->word_char); +#ifdef DEBUG + re_free (dfa->re_str); +#endif re_free (dfa); } re_free (preg->fastmap); @@ -740,6 +743,10 @@ re_compile_internal (preg, pattern, length, syntax) preg->buffer = NULL; return err; } +#ifdef DEBUG + dfa->re_str = re_malloc (char, length + 1); + strncpy (dfa->re_str, pattern, length + 1); +#endif err = re_string_construct (®exp, pattern, length, preg->translate, syntax & RE_ICASE); @@ -874,6 +881,25 @@ create_initial_state (dfa) { int node_idx = init_nodes.elems[i]; re_token_type_t type = dfa->nodes[node_idx].type; + + int clexp_idx; + int entity = (type != OP_CONTEXT_NODE ? node_idx + : dfa->nodes[node_idx].opr.ctx_info->entity); + if ((type != OP_CONTEXT_NODE + || (dfa->nodes[entity].type != OP_BACK_REF)) + && (type != OP_BACK_REF)) + continue; + for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx + 1 == dfa->nodes[entity].opr.idx) + break; + } + if (clexp_idx == init_nodes.nelem) + continue; + if (type == OP_CONTEXT_NODE && (dfa->nodes[dfa->nodes[node_idx].opr.ctx_info->entity].type == OP_BACK_REF)) @@ -1816,6 +1842,7 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err) tree = create_tree (tree, branch, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) return *err = REG_ESPACE, NULL; + dfa->has_plural_match = 1; } return tree; } @@ -2035,6 +2062,7 @@ parse_expression (regexp, preg, token, syntax, nest, err) tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; + dfa->has_plural_match = 1; } return tree; @@ -2919,6 +2947,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) if (BE (new_idx == -1 || mbc_tree == NULL, 0)) goto parse_bracket_exp_espace; /* Then join them by ALT node. */ + dfa->has_plural_match = 1; alt_token.type = OP_ALT; new_idx = re_dfa_add_node (dfa, alt_token, 0); work_tree = create_tree (work_tree, mbc_tree, 0, new_idx); diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 116543a6da..52540dcea6 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -620,50 +620,6 @@ re_node_set_init_copy (dest, src) return REG_NOERROR; } -/* Calculate the intersection of the sets SRC1 and SRC2. And store it in - DEST. Return value indicate the error code or REG_NOERROR if succeeded. - Note: We assume dest->elems is NULL, when dest->alloc is 0. */ - -static reg_errcode_t -re_node_set_intersect (dest, src1, src2) - re_node_set *dest; - const re_node_set *src1, *src2; -{ - int i1, i2, id; - if (src1->nelem > 0 && src2->nelem > 0) - { - if (src1->nelem + src2->nelem > dest->alloc) - { - dest->alloc = src1->nelem + src2->nelem; - dest->elems = re_realloc (dest->elems, int, dest->alloc); - if (BE (dest->elems == NULL, 0)) - return REG_ESPACE; - } - } - else - { - /* The intersection of empty sets is also empty set. */ - dest->nelem = 0; - return REG_NOERROR; - } - - for (i1 = i2 = id = 0; i1 < src1->nelem && i2 < src2->nelem; ) - { - if (src1->elems[i1] > src2->elems[i2]) - { - ++i2; - continue; - } - /* The intersection must have the elements which are in both of - SRC1 and SRC2. */ - if (src1->elems[i1] == src2->elems[i2]) - dest->elems[id++] = src2->elems[i2++]; - ++i1; - } - dest->nelem = id; - return REG_NOERROR; -} - /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to DEST. Return value indicate the error code or REG_NOERROR if succeeded. Note: We assume dest->elems is NULL, when dest->alloc is 0. */ @@ -912,7 +868,7 @@ re_node_set_compare (set1, set2) return 1; } -/* Return 1 if SET contains the element ELEM, return 0 otherwise. */ +/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ static int re_node_set_contains (set, elem) @@ -934,7 +890,7 @@ re_node_set_contains (set, elem) else right = mid; } - return set->elems[idx] == elem; + return set->elems[idx] == elem ? idx + 1 : 0; } static void diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 9f1f9826f2..cc6584561c 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -407,8 +407,9 @@ struct re_state_table_entry struct re_backref_cache_entry { int node; - int from; - int to; + int str_idx; + int subexp_from; + int subexp_to; int flag; }; @@ -427,9 +428,24 @@ typedef struct int nbkref_ents; int abkref_ents; struct re_backref_cache_entry *bkref_ents; - int max_bkref_len; + int max_mb_elem_len; } re_match_context_t; +typedef struct +{ + int cur_bkref; + int cls_subexp_idx; + + re_dfastate_t **sifted_states; + re_dfastate_t **limited_states; + + re_node_set limits; + + int last_node; + int last_str_idx; + int check_subexp; +} re_sift_context_t; + struct re_dfa_t { re_bitset_ptr_t word_char; @@ -459,6 +475,10 @@ struct re_dfa_t /* If this dfa has "multibyte node", which is a backreference or a node which can accept multibyte character or multi character collating element. */ +#ifdef DEBUG + char* re_str; +#endif + unsigned int has_plural_match : 1; unsigned int has_mb_node : 1; }; typedef struct re_dfa_t re_dfa_t; @@ -470,9 +490,6 @@ static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1, #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) static reg_errcode_t re_node_set_init_copy (re_node_set *dest, const re_node_set *src); -static reg_errcode_t re_node_set_intersect (re_node_set *dest, - const re_node_set *src1, - const re_node_set *src2); static reg_errcode_t re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, const re_node_set *src2); diff --git a/posix/regexec.c b/posix/regexec.c index 142127883d..88b20dd31f 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -47,7 +47,11 @@ static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, re_string_t *input, int n); static void match_ctx_free (re_match_context_t *cache); static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, - int from, int to); + int str_idx, int from, int to); +static void match_ctx_clear_flag (re_match_context_t *mctx); +static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, + int last_str_idx, int check_subexp); static reg_errcode_t re_search_internal (const regex_t *preg, const char *string, int length, int start, int range, int stop, @@ -77,7 +81,7 @@ static int check_halt_state_context (const regex_t *preg, const re_match_context_t *mctx, int idx); static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node, int cur_idx, int nmatch); -static int proceed_next_node (const regex_t *preg, +static int proceed_next_node (const regex_t *preg, regmatch_t *regs, const re_match_context_t *mctx, int *pidx, int node, re_node_set *eps_via_nodes); static reg_errcode_t set_regs (const regex_t *preg, @@ -86,22 +90,41 @@ static reg_errcode_t set_regs (const regex_t *preg, #ifdef RE_ENABLE_I18N static int sift_states_iter_mb (const regex_t *preg, const re_match_context_t *mctx, + re_sift_context_t *sctx, int node_idx, int str_idx, int max_str_idx); #endif /* RE_ENABLE_I18N */ -static int sift_states_iter_bkref (const re_dfa_t *dfa, - re_dfastate_t **state_log, - struct re_backref_cache_entry *mctx_entry, - int node_idx, int idx, int match_last); static reg_errcode_t sift_states_backward (const regex_t *preg, - const re_match_context_t *mctx, - int last_node); + re_match_context_t *mctx, + re_sift_context_t *sctx); +static reg_errcode_t update_cur_sifted_state (const regex_t *preg, + re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, + re_node_set *dest_nodes); +static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates); +static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node, + re_node_set *dest_nodes, + const re_node_set *and_nodes); +static reg_errcode_t check_subexp_limits (re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates, + re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, + int str_idx); +static reg_errcode_t search_subexp (const regex_t *preg, + re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *dest_nodes); +static reg_errcode_t sift_states_bkref (const regex_t *preg, + re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, re_node_set *dest_nodes); static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx, int next_state_log_idx); -static reg_errcode_t add_epsilon_backreference (const re_dfa_t *dfa, - const re_match_context_t *mctx, - const re_node_set *plog, - int idx, - re_node_set *state_buf); +static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, int num); static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg, re_match_context_t *mctx, re_dfastate_t *state, int fl_search); @@ -633,7 +656,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, { /* It seems to be appropriate one, then use the matcher. */ /* We assume that the matching starts from 0. */ - mctx.state_log_top = mctx.nbkref_ents = mctx.max_bkref_len = 0; + mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; match_last = check_matching (preg, &mctx, 0, fl_longest_match); if (match_last != -1) { @@ -667,15 +690,45 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, { /* We need the ranges of all the subexpressions. */ int halt_node; + re_dfastate_t **sifted_states; + re_dfastate_t **lim_states = NULL; re_dfastate_t *pstate = mctx.state_log[match_last]; + re_sift_context_t sctx; #ifdef DEBUG assert (mctx.state_log != NULL); #endif halt_node = check_halt_state_context (preg, pstate, &mctx, match_last); - err = sift_states_backward (preg, &mctx, halt_node); - if (BE (err != REG_NOERROR, 0)) - return err; + if (dfa->has_plural_match) + { + match_ctx_clear_flag (&mctx); + sifted_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (sifted_states == NULL, 0)) + return REG_ESPACE; + if (dfa->nbackref) + { + lim_states = calloc (sizeof (re_dfastate_t *), + match_last + 1); + if (BE (lim_states == NULL, 0)) + return REG_ESPACE; + } + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, + mctx.match_last, 0); + err = sift_states_backward (preg, &mctx, &sctx); + if (BE (err != REG_NOERROR, 0)) + return err; + if (lim_states != NULL) + { + err = merge_state_array (dfa, sifted_states, lim_states, + match_last + 1); + if (BE (err != REG_NOERROR, 0)) + return err; + re_free (lim_states); + } + re_node_set_free (&sctx.limits); + re_free (mctx.state_log); + mctx.state_log = sifted_states; + } err = set_regs (preg, &mctx, nmatch, pmatch, halt_node); if (BE (err != REG_NOERROR, 0)) return err; @@ -695,6 +748,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, if (dfa->nbackref) match_ctx_free (&mctx); re_string_destruct (&input); + return (match_last == -1) ? REG_NOMATCH : REG_NOERROR; } @@ -766,6 +820,39 @@ check_matching (preg, mctx, fl_search, fl_longest_match) return -2; if (mctx->state_log != NULL) mctx->state_log[cur_str_idx] = cur_state; + + if (cur_state->has_backref) + { + int i; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + for (i = 0; i < cur_state->nodes.nelem; ++i) + { + re_token_type_t type; + int node = cur_state->nodes.elems[i]; + int entity = (dfa->nodes[node].type != OP_CONTEXT_NODE ? node + : dfa->nodes[node].opr.ctx_info->entity); + type = dfa->nodes[entity].type; + if (type == OP_BACK_REF) + { + int clexp_idx; + for (clexp_idx = 0; clexp_idx < cur_state->nodes.nelem; + ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + cur_state->nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx + 1== dfa->nodes[entity].opr.idx) + { + err = match_ctx_add_entry (mctx, node, 0, 0, 0); + if (BE (err != REG_NOERROR, 0)) + return -2; + break; + } + } + } + } + } + /* If the RE accepts NULL string. */ if (cur_state->halt) { @@ -894,8 +981,9 @@ check_halt_state_context (preg, state, mctx, idx) of errors. */ static int -proceed_next_node (preg, mctx, pidx, node, eps_via_nodes) +proceed_next_node (preg, regs, mctx, pidx, node, eps_via_nodes) const regex_t *preg; + regmatch_t *regs; const re_match_context_t *mctx; int *pidx, node; re_node_set *eps_via_nodes; @@ -956,12 +1044,9 @@ proceed_next_node (preg, mctx, pidx, node, eps_via_nodes) #endif /* RE_ENABLE_I18N */ if (type == OP_BACK_REF) { - for (i = 0; i < mctx->nbkref_ents; ++i) - { - if (mctx->bkref_ents[i].node == node - && mctx->bkref_ents[i].from == *pidx) - naccepted = mctx->bkref_ents[i].to - *pidx; - } + int subexp_idx = dfa->nodes[entity].opr.idx; + naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; + if (naccepted == 0) { err = re_node_set_insert (eps_via_nodes, node); @@ -1031,7 +1116,8 @@ set_regs (preg, mctx, nmatch, pmatch, last_node) break; /* Proceed to next node. */ - cur_node = proceed_next_node (preg, mctx, &idx, cur_node, &eps_via_nodes); + cur_node = proceed_next_node (preg, pmatch, mctx, &idx, cur_node, + &eps_via_nodes); if (BE (cur_node < 0, 0)) return REG_ESPACE; } @@ -1061,11 +1147,11 @@ update_regs (dfa, pmatch, cur_node, cur_idx, nmatch) else if (type == OP_CLOSE_SUBEXP) /* We are at the first node of this sub expression. */ pmatch[reg_num].rm_eo = cur_idx; - } +} #define NUMBER_OF_STATE 1 -/* This function checks the STATE_LOG from the MCTX->match_last to 0 +/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 and sift the nodes in each states according to the following rules. Updated state_log will be wrote to STATE_LOG. @@ -1089,124 +1175,104 @@ update_regs (dfa, pmatch, cur_node, cur_idx, nmatch) ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) static reg_errcode_t -sift_states_backward (preg, mctx, last_node) - const regex_t *preg; - const re_match_context_t *mctx; - int last_node; +sift_states_backward (preg, mctx, sctx) + const regex_t *preg; + re_match_context_t *mctx; + re_sift_context_t *sctx; { reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - re_node_set state_buf; - int str_idx = mctx->match_last; - re_node_set *plog; /* Points the state_log[str_idx]->nodes */ + int null_cnt = 0; + int str_idx = sctx->last_str_idx; + re_node_set cur_dest; + re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */ #ifdef DEBUG assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); #endif - err = re_node_set_alloc (&state_buf, NUMBER_OF_STATE); - if (BE (err != REG_NOERROR, 0)) - return err; - plog = &mctx->state_log[str_idx]->nodes; + cur_src = &mctx->state_log[str_idx]->nodes; /* Build sifted state_log[str_idx]. It has the nodes which can epsilon transit to the last_node and the last_node itself. */ - err = re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node); + err = re_node_set_init_1 (&cur_dest, sctx->last_node); if (BE (err != REG_NOERROR, 0)) return err; - - if (mctx->state_log[str_idx] != NULL - && mctx->state_log[str_idx]->has_backref) - { - err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - /* Update state log. */ - mctx->state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf); - if (BE (mctx->state_log[str_idx] == NULL && err != REG_NOERROR, 0)) + err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) return err; /* Then check each states in the state_log. */ while (str_idx > 0) { - int i, j; + int i, ret; /* Update counters. */ - re_node_set_empty (&state_buf); + null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; + if (null_cnt > mctx->max_mb_elem_len) + { + memset (sctx->sifted_states, '\0', + sizeof (re_dfastate_t *) * str_idx); + return REG_NOERROR; + } + re_node_set_empty (&cur_dest); --str_idx; - plog = ((mctx->state_log[str_idx] == NULL) ? &empty_set - : &mctx->state_log[str_idx]->nodes); + cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set + : &mctx->state_log[str_idx]->nodes); /* Then build the next sifted state. - We build the next sifted state on `state_buf', and update - `state_log[str_idx]' with `state_buf'. + We build the next sifted state on `cur_dest', and update + `sifted_st |
