diff options
| author | Ulrich Drepper <drepper@redhat.com> | 2002-11-06 20:36:47 +0000 |
|---|---|---|
| committer | Ulrich Drepper <drepper@redhat.com> | 2002-11-06 20:36:47 +0000 |
| commit | 15a7d175bc5e1f869ca5874c9ee5d26377d8403f (patch) | |
| tree | b8f28abd5f40d2c157b398986bb141fe3be85089 | |
| parent | 1b2c2628354003bd97af125a164e830c9d295e3a (diff) | |
| download | glibc-15a7d175bc5e1f869ca5874c9ee5d26377d8403f.tar.xz glibc-15a7d175bc5e1f869ca5874c9ee5d26377d8403f.zip | |
Update.
2002-11-06 Ulrich Drepper <drepper@redhat.com>
* posix/regcomp.c: Use tabs instead of spaces.
* posix/regexec.c: Likewise.
* posix/regex_internal.h: Likewise.
* posix/regcomp.c (re_compile_fastmap_iter): Use __wcrtomb not wctomb.
| -rw-r--r-- | ChangeLog | 8 | ||||
| -rw-r--r-- | posix/regcomp.c | 2044 | ||||
| -rw-r--r-- | posix/regex_internal.h | 41 | ||||
| -rw-r--r-- | posix/regexec.c | 2718 |
4 files changed, 2411 insertions, 2400 deletions
@@ -1,3 +1,11 @@ +2002-11-06 Ulrich Drepper <drepper@redhat.com> + + * posix/regcomp.c: Use tabs instead of spaces. + * posix/regexec.c: Likewise. + * posix/regex_internal.h: Likewise. + + * posix/regcomp.c (re_compile_fastmap_iter): Use __wcrtomb not wctomb. + 2002-11-06 Jakub Jelinek <jakub@redhat.com> * posix/regcomp.c (re_compile_pattern): Don't set regs_allocated diff --git a/posix/regcomp.c b/posix/regcomp.c index 03d5cfce14..c9c0d9eb37 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -69,10 +69,10 @@ #include "regex_internal.h" static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, - int length, reg_syntax_t syntax); + int length, reg_syntax_t syntax); static void re_compile_fastmap_iter (regex_t *bufp, - const re_dfastate_t *init_state, - char *fastmap); + const re_dfastate_t *init_state, + char *fastmap); static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); static reg_errcode_t init_word_char (re_dfa_t *dfa); #ifdef RE_ENABLE_I18N @@ -86,88 +86,88 @@ static void calc_first (re_dfa_t *dfa, bin_tree_t *node); static void calc_next (re_dfa_t *dfa, bin_tree_t *node); static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node); static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node, - int top_clone_node, int root_node, - unsigned int constraint); + int top_clone_node, int root_node, + unsigned int constraint); static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, - unsigned int constraint); + unsigned int constraint); static reg_errcode_t calc_eclosure (re_dfa_t *dfa); static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, - int node, int root); + int node, int root); static void calc_inveclosure (re_dfa_t *dfa); static int fetch_number (re_string_t *input, re_token_t *token, - reg_syntax_t syntax); + reg_syntax_t syntax); static re_token_t fetch_token (re_string_t *input, reg_syntax_t syntax); static int peek_token (re_token_t *token, re_string_t *input, - reg_syntax_t syntax); + reg_syntax_t syntax); static int peek_token_bracket (re_token_t *token, re_string_t *input, - reg_syntax_t syntax); + reg_syntax_t syntax); static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, - reg_syntax_t syntax, reg_errcode_t *err); + reg_syntax_t syntax, reg_errcode_t *err); static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, - re_dfa_t *dfa, re_token_t *token, - reg_syntax_t syntax, reg_errcode_t *err); + re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err); static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, - re_token_t *token, reg_syntax_t syntax, - reg_errcode_t *err); + re_token_t *token, reg_syntax_t syntax, + reg_errcode_t *err); static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, - re_string_t *regexp, - re_token_t *token, int token_len, - re_dfa_t *dfa, - reg_syntax_t syntax); + re_string_t *regexp, + re_token_t *token, int token_len, + re_dfa_t *dfa, + reg_syntax_t syntax); static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, - re_string_t *regexp, - re_token_t *token); + re_string_t *regexp, + re_token_t *token); #ifndef _LIBC # ifdef RE_ENABLE_I18N static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, int *range_alloc, - bracket_elem_t *start_elem, - bracket_elem_t *end_elem); + re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, + bracket_elem_t *end_elem); static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, - int *coll_sym_alloc, - const unsigned char *name); + re_charset_t *mbcset, + int *coll_sym_alloc, + const unsigned char *name); # else /* not RE_ENABLE_I18N */ static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, - bracket_elem_t *start_elem, - bracket_elem_t *end_elem); + bracket_elem_t *start_elem, + bracket_elem_t *end_elem); static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, - const unsigned char *name); + const unsigned char *name); # endif /* not RE_ENABLE_I18N */ #endif /* not _LIBC */ #ifdef RE_ENABLE_I18N static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, - int *equiv_class_alloc, - const unsigned char *name); + re_charset_t *mbcset, + int *equiv_class_alloc, + const unsigned char *name); static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, - int *char_class_alloc, - const unsigned char *class_name, - reg_syntax_t syntax); + re_charset_t *mbcset, + int *char_class_alloc, + const unsigned char *class_name, + reg_syntax_t syntax); #else /* not RE_ENABLE_I18N */ static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, - const unsigned char *name); + const unsigned char *name); static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, - const unsigned char *class_name, - reg_syntax_t syntax); + const unsigned char *class_name, + reg_syntax_t syntax); #endif /* not RE_ENABLE_I18N */ static bin_tree_t *build_word_op (re_dfa_t *dfa, int not, reg_errcode_t *err); static void free_bin_tree (bin_tree_t *tree); static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right, - re_token_type_t type, int index); + re_token_type_t type, int index); static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); /* This table gives an error message for each of the error codes listed @@ -363,60 +363,62 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) if (type == CHARACTER) re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); else if (type == SIMPLE_BRACKET) - { - int i, j, ch; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) - re_set_fastmap (fastmap, icase, ch); - } + { + int i, j, ch; + for (i = 0, ch = 0; i < BITSET_UINTS; ++i) + for (j = 0; j < UINT_BITS; ++j, ++ch) + if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) + re_set_fastmap (fastmap, icase, ch); + } #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET) - { - int i; - re_charset_t *cset = dfa->nodes[node].opr.mbcset; - if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes - || cset->nranges || cset->nchar_classes) - { + { + int i; + re_charset_t *cset = dfa->nodes[node].opr.mbcset; + if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes + || cset->nranges || cset->nchar_classes) + { # ifdef _LIBC - if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) - { - /* In this case we want to catch the bytes which are - the first byte of any collation elements. - e.g. In da_DK, we want to catch 'a' since "aa" - is a valid collation element, and don't catch - 'b' since 'b' is the only collation element - which starts from 'b'. */ - int j, ch; - const int32_t *table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (table[ch] < 0) - re_set_fastmap (fastmap, icase, ch); - } + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) + { + /* In this case we want to catch the bytes which are + the first byte of any collation elements. + e.g. In da_DK, we want to catch 'a' since "aa" + is a valid collation element, and don't catch + 'b' since 'b' is the only collation element + which starts from 'b'. */ + int j, ch; + const int32_t *table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + for (i = 0, ch = 0; i < BITSET_UINTS; ++i) + for (j = 0; j < UINT_BITS; ++j, ++ch) + if (table[ch] < 0) + re_set_fastmap (fastmap, icase, ch); + } # else - if (MB_CUR_MAX > 1) - for (i = 0; i < SBC_MAX; ++i) - if (__btowc (i) == WEOF) - re_set_fastmap (fastmap, icase, i); + if (MB_CUR_MAX > 1) + for (i = 0; i < SBC_MAX; ++i) + if (__btowc (i) == WEOF) + re_set_fastmap (fastmap, icase, i); # endif /* not _LIBC */ - } - for (i = 0; i < cset->nmbchars; ++i) - { - char buf[256]; - wctomb (buf, cset->mbchars[i]); - re_set_fastmap (fastmap, icase, *(unsigned char *) buf); - } - } + } + for (i = 0; i < cset->nmbchars; ++i) + { + char buf[256]; + mbstate_t state; + memset (&state, '\0', sizeof (state)); + __wcrtomb (buf, cset->mbchars[i], &state); + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); + } + } #endif /* RE_ENABLE_I18N */ else if (type == END_OF_RE || type == OP_PERIOD) - { - memset (fastmap, '\1', sizeof (char) * SBC_MAX); - if (type == END_OF_RE) - bufp->can_be_null = 1; - return; - } + { + memset (fastmap, '\1', sizeof (char) * SBC_MAX); + if (type == END_OF_RE) + bufp->can_be_null = 1; + return; + } } } @@ -464,7 +466,7 @@ regcomp (preg, pattern, cflags) { reg_errcode_t ret; reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED - : RE_SYNTAX_POSIX_BASIC); + : RE_SYNTAX_POSIX_BASIC); preg->buffer = NULL; preg->allocated = 0; @@ -529,8 +531,8 @@ regerror (errcode, preg, errbuf, errbuf_size) size_t msg_size; if (BE (errcode < 0 - || errcode >= (int) (sizeof (__re_error_msgid_idx) - / sizeof (__re_error_msgid_idx[0])), 0)) + || errcode >= (int) (sizeof (__re_error_msgid_idx) + / sizeof (__re_error_msgid_idx[0])), 0)) /* Only error codes returned by the rest of the code should be passed to this routine. If we are given anything else, or if other regex code generates an invalid error code, then the program has a bug. @@ -544,16 +546,16 @@ regerror (errcode, preg, errbuf, errbuf_size) if (BE (errbuf_size != 0, 1)) { if (BE (msg_size > errbuf_size, 0)) - { + { #if defined HAVE_MEMPCPY || defined _LIBC *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; #else - memcpy (errbuf, msg, errbuf_size - 1); - errbuf[errbuf_size - 1] = 0; + memcpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; #endif - } + } else - memcpy (errbuf, msg, msg_size); + memcpy (errbuf, msg, msg_size); } return msg_size; @@ -756,7 +758,7 @@ re_compile_internal (preg, pattern, length, syntax) #endif err = re_string_construct (®exp, pattern, length, preg->translate, - syntax & RE_ICASE); + syntax & RE_ICASE); if (BE (err != REG_NOERROR, 0)) { re_free (dfa); @@ -823,7 +825,7 @@ init_dfa (dfa, pat_len) dfa->word_char = NULL; if (BE (dfa->nodes == NULL || dfa->state_table == NULL - || dfa->subexps == NULL, 0)) + || dfa->subexps == NULL, 0)) { /* We don't bother to free anything which was allocated. Very soon the process will go down anyway. */ @@ -850,7 +852,7 @@ init_word_char (dfa) for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if (isalnum (ch) || ch == '_') - dfa->word_char[i] |= 1 << j; + dfa->word_char[i] |= 1 << j; return REG_NOERROR; } @@ -890,32 +892,32 @@ create_initial_state (dfa) if (dfa->nbackref > 0) for (i = 0; i < init_nodes.nelem; ++i) { - int node_idx = init_nodes.elems[i]; - re_token_type_t type = dfa->nodes[node_idx].type; - - int clexp_idx; - if (type != OP_BACK_REF) - continue; - for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) - { - re_token_t *clexp_node; - clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; - if (clexp_node->type == OP_CLOSE_SUBEXP - && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx) - break; - } - if (clexp_idx == init_nodes.nelem) - continue; - - if (type == OP_BACK_REF) - { - int dest_idx = dfa->edests[node_idx].elems[0]; - if (!re_node_set_contains (&init_nodes, dest_idx)) - { - re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); - i = 0; - } - } + int node_idx = init_nodes.elems[i]; + re_token_type_t type = dfa->nodes[node_idx].type; + + int clexp_idx; + if (type != OP_BACK_REF) + continue; + for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx) + break; + } + if (clexp_idx == init_nodes.nelem) + continue; + + if (type == OP_BACK_REF) + { + int dest_idx = dfa->edests[node_idx].elems[0]; + if (!re_node_set_contains (&init_nodes, dest_idx)) + { + re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); + i = 0; + } + } } /* It must be the first time to invoke acquire_state. */ @@ -926,16 +928,16 @@ create_initial_state (dfa) if (dfa->init_state->has_constraint) { dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, - CONTEXT_WORD); + CONTEXT_WORD); dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, - CONTEXT_NEWLINE); + CONTEXT_NEWLINE); dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, - &init_nodes, - CONTEXT_NEWLINE - | CONTEXT_BEGBUF); + &init_nodes, + CONTEXT_NEWLINE + | CONTEXT_BEGBUF); if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL - || dfa->init_state_begbuf == NULL, 0)) - return err; + || dfa->init_state_begbuf == NULL, 0)) + return err; } else dfa->init_state_word = dfa->init_state_nl @@ -961,7 +963,7 @@ analyze (dfa) dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc); if (BE (dfa->nexts == NULL || dfa->edests == NULL - || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0)) + || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0)) return REG_ESPACE; /* Initialize them. */ for (i = 0; i < dfa->nodes_len; ++i) @@ -1003,14 +1005,14 @@ analyze_tree (dfa, node) { ret = analyze_tree (dfa, node->left); if (BE (ret != REG_NOERROR, 0)) - return ret; + return ret; } /* Calculate "first" etc. for the right child. */ if (node->right != NULL) { ret = analyze_tree (dfa, node->right); if (BE (ret != REG_NOERROR, 0)) - return ret; + return ret; } return REG_NOERROR; } @@ -1062,7 +1064,7 @@ calc_first (dfa, node) assert (node->left != NULL); #endif if (node->left->first == -1) - calc_first (dfa, node->left); + calc_first (dfa, node->left); node->first = node->left->first; break; case OP_ALT: @@ -1074,7 +1076,7 @@ calc_first (dfa, node) assert (node->left != NULL); #endif if (node->left->first == -1) - calc_first (dfa, node->left); + calc_first (dfa, node->left); node->first = node->left->first; break; } @@ -1094,7 +1096,7 @@ calc_next (dfa, node) node->next = -1; idx = node->node_idx; if (node->type == 0) - dfa->nexts[idx] = node->next; + dfa->nexts[idx] = node->next; return; } @@ -1109,16 +1111,16 @@ calc_next (dfa, node) break; case CONCAT: if (parent->left == node) - { - if (parent->right->first == -1) - calc_first (dfa, parent->right); - node->next = parent->right->first; - break; - } + { + if (parent->right->first == -1) + calc_first (dfa, parent->right); + node->next = parent->right->first; + break; + } /* else fall through */ default: if (parent->next == -1) - calc_next (dfa, parent); + calc_next (dfa, parent); node->next = parent->next; break; } @@ -1139,50 +1141,50 @@ calc_epsdest (dfa, node) if (node->type == 0) { if (dfa->nodes[idx].type == OP_DUP_ASTERISK - || dfa->nodes[idx].type == OP_DUP_PLUS - || dfa->nodes[idx].type == OP_DUP_QUESTION) - { - if (node->left->first == -1) - calc_first (dfa, node->left); - if (node->next == -1) - calc_next (dfa, node); - re_node_set_init_2 (dfa->edests + idx, node->left->first, - node->next); - } + || dfa->nodes[idx].type == OP_DUP_PLUS + || dfa->nodes[idx].type == OP_DUP_QUESTION) + { + if (node->left->first == -1) + calc_first (dfa, node->left); + if (node->next == -1) + calc_next (dfa, node); + re_node_set_init_2 (dfa->edests + idx, node->left->first, + node->next); + } else if (dfa->nodes[idx].type == OP_ALT) - { - int left, right; - if (node->left != NULL) - { - if (node->left->first == -1) - calc_first (dfa, node->left); - left = node->left->first; - } - else - { - if (node->next == -1) - calc_next (dfa, node); - left = node->next; - } - if (node->right != NULL) - { - if (node->right->first == -1) - calc_first (dfa, node->right); - right = node->right->first; - } - else - { - if (node->next == -1) - calc_next (dfa, node); - right = node->next; - } - re_node_set_init_2 (dfa->edests + idx, left, right); - } + { + int left, right; + if (node->left != NULL) + { + if (node->left->first == -1) + calc_first (dfa, node->left); + left = node->left->first; + } + else + { + if (node->next == -1) + calc_next (dfa, node); + left = node->next; + } + if (node->right != NULL) + { + if (node->right->first == -1) + calc_first (dfa, node->right); + right = node->right->first; + } + else + { + if (node->next == -1) + calc_next (dfa, node); + right = node->next; + } + re_node_set_init_2 (dfa->edests + idx, left, right); + } else if (dfa->nodes[idx].type == ANCHOR - || dfa->nodes[idx].type == OP_OPEN_SUBEXP - || dfa->nodes[idx].type == OP_CLOSE_SUBEXP - || dfa->nodes[idx].type == OP_BACK_REF) - re_node_set_init_1 (dfa->edests + idx, node->next); + || dfa->nodes[idx].type == OP_OPEN_SUBEXP + || dfa->nodes[idx].type == OP_CLOSE_SUBEXP + || dfa->nodes[idx].type == OP_BACK_REF) + re_node_set_init_1 (dfa->edests + idx, node->next); } } @@ -1192,7 +1194,7 @@ calc_epsdest (dfa, node) static reg_errcode_t duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, - init_constraint) + init_constraint) re_dfa_t *dfa; int top_org_node, top_clone_node, root_node; unsigned int init_constraint; @@ -1204,84 +1206,84 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, { int org_dest, clone_dest; if (dfa->nodes[org_node].type == OP_BACK_REF) - { + { /* If the back reference epsilon-transit, its destination must also have the constraint. Then duplicate the epsilon closure |
