From d7d64cdb3d3f8f7b52e6df68fff7c745c6700c98 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 19 Jun 2001 00:43:55 +0000 Subject: Update. 2001-06-17 Alan Modra * sysdeps/hppa/dl-machine.h (elf_machine_rela): handle relocs at unaligned address. --- ChangeLog | 5 + posix/regex.c | 2165 +++++++++++++++++++++++++-------------------- sysdeps/hppa/dl-machine.h | 11 +- 3 files changed, 1201 insertions(+), 980 deletions(-) diff --git a/ChangeLog b/ChangeLog index e3237306e0..3a2ab448bf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2001-06-17 Alan Modra + + * sysdeps/hppa/dl-machine.h (elf_machine_rela): handle relocs + at unaligned address. + 2001-06-18 Ulrich Drepper * posix/regex.c: Reorganize code to build code twice, once for single diff --git a/posix/regex.c b/posix/regex.c index 086de08084..20d8b71fc2 100644 --- a/posix/regex.c +++ b/posix/regex.c @@ -39,180 +39,158 @@ # endif /* GCC. */ #endif /* Not PARAMS. */ -#if defined STDC_HEADERS && !defined emacs -# include -#else +#ifndef INSIDE_RECURSION + +# if defined STDC_HEADERS && !defined emacs +# include +# else /* We need this for `regex.h', and perhaps for the Emacs include files. */ -# include -#endif +# include +# endif -#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) +# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) /* For platform which support the ISO C amendement 1 functionality we support user defined character classes. */ -#if defined _LIBC || WIDE_CHAR_SUPPORT +# if defined _LIBC || WIDE_CHAR_SUPPORT /* Solaris 2.5 has a bug: must be included before . */ -# include -# include -#endif - -/* This is for multi byte string support. */ -#ifdef MBS_SUPPORT -# define CHAR_TYPE wchar_t -# define US_CHAR_TYPE wchar_t/* unsigned character type */ -# define COMPILED_BUFFER_VAR wc_buffer -# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ -# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_TYPE)+1) -# define PUT_CHAR(c) \ - do { \ - if (MB_CUR_MAX == 1) \ - putchar (c); \ - else \ - printf ("%C", (wint_t) c); /* Should we use wide stream?? */ \ - } while (0) -# define TRUE 1 -# define FALSE 0 -#else -# define CHAR_TYPE char -# define US_CHAR_TYPE unsigned char /* unsigned character type */ -# define COMPILED_BUFFER_VAR bufp->buffer -# define OFFSET_ADDRESS_SIZE 2 -# define PUT_CHAR(c) putchar (c) -#endif /* MBS_SUPPORT */ +# include +# include +# endif -#ifdef _LIBC +# ifdef _LIBC /* We have to keep the namespace clean. */ -# define regfree(preg) __regfree (preg) -# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) -# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) -# define regerror(errcode, preg, errbuf, errbuf_size) \ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ __regerror(errcode, preg, errbuf, errbuf_size) -# define re_set_registers(bu, re, nu, st, en) \ +# define re_set_registers(bu, re, nu, st, en) \ __re_set_registers (bu, re, nu, st, en) -# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) -# define re_match(bufp, string, size, pos, regs) \ +# define re_match(bufp, string, size, pos, regs) \ __re_match (bufp, string, size, pos, regs) -# define re_search(bufp, string, size, startpos, range, regs) \ +# define re_search(bufp, string, size, startpos, range, regs) \ __re_search (bufp, string, size, startpos, range, regs) -# define re_compile_pattern(pattern, length, bufp) \ +# define re_compile_pattern(pattern, length, bufp) \ __re_compile_pattern (pattern, length, bufp) -# define re_set_syntax(syntax) __re_set_syntax (syntax) -# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) -# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) -# define btowc __btowc +# define btowc __btowc /* We are also using some library internals. */ -# include -# include -# include -# include -#endif +# include +# include +# include +# include +# endif /* This is for other GNU distributions with internationalized messages. */ -#if HAVE_LIBINTL_H || defined _LIBC -# include -# ifdef _LIBC -# undef gettext -# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES) +# if HAVE_LIBINTL_H || defined _LIBC +# include +# ifdef _LIBC +# undef gettext +# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES) +# endif +# else +# define gettext(msgid) (msgid) # endif -#else -# define gettext(msgid) (msgid) -#endif -#ifndef gettext_noop +# ifndef gettext_noop /* This define is so xgettext can find the internationalizable strings. */ -# define gettext_noop(String) String -#endif +# define gettext_noop(String) String +# endif /* The `emacs' switch turns on certain matching commands that make sense only in Emacs. */ -#ifdef emacs +# ifdef emacs -# include "lisp.h" -# include "buffer.h" -# include "syntax.h" +# include "lisp.h" +# include "buffer.h" +# include "syntax.h" -#else /* not emacs */ +# else /* not emacs */ /* If we are not linking with Emacs proper, we can't use the relocating allocator even if config.h says that we can. */ -# undef REL_ALLOC +# undef REL_ALLOC -# if defined STDC_HEADERS || defined _LIBC -# include -# else +# if defined STDC_HEADERS || defined _LIBC +# include +# else char *malloc (); char *realloc (); -# endif +# endif /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. If nothing else has been done, use the method below. */ -# ifdef INHIBIT_STRING_HEADER -# if !(defined HAVE_BZERO && defined HAVE_BCOPY) -# if !defined bzero && !defined bcopy -# undef INHIBIT_STRING_HEADER +# ifdef INHIBIT_STRING_HEADER +# if !(defined HAVE_BZERO && defined HAVE_BCOPY) +# if !defined bzero && !defined bcopy +# undef INHIBIT_STRING_HEADER +# endif # endif # endif -# endif /* This is the normal way of making sure we have a bcopy and a bzero. This is used in most programs--a few other programs avoid this by defining INHIBIT_STRING_HEADER. */ -# ifndef INHIBIT_STRING_HEADER -# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC -# include -# ifndef bzero -# ifndef _LIBC -# define bzero(s, n) (memset (s, '\0', n), (s)) -# else -# define bzero(s, n) __bzero (s, n) +# ifndef INHIBIT_STRING_HEADER +# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC +# include +# ifndef bzero +# ifndef _LIBC +# define bzero(s, n) (memset (s, '\0', n), (s)) +# else +# define bzero(s, n) __bzero (s, n) +# endif +# endif +# else +# include +# ifndef memcmp +# define memcmp(s1, s2, n) bcmp (s1, s2, n) +# endif +# ifndef memcpy +# define memcpy(d, s, n) (bcopy (s, d, n), (d)) # endif -# endif -# else -# include -# ifndef memcmp -# define memcmp(s1, s2, n) bcmp (s1, s2, n) -# endif -# ifndef memcpy -# define memcpy(d, s, n) (bcopy (s, d, n), (d)) # endif # endif -# endif /* Define the syntax stuff for \<, \>, etc. */ /* This must be nonzero for the wordchar and notwordchar pattern commands in re_match_2. */ -# ifndef Sword -# define Sword 1 -# endif +# ifndef Sword +# define Sword 1 +# endif -# ifdef SWITCH_ENUM_BUG -# define SWITCH_ENUM_CAST(x) ((int)(x)) -# else -# define SWITCH_ENUM_CAST(x) (x) -# endif +# ifdef SWITCH_ENUM_BUG +# define SWITCH_ENUM_CAST(x) ((int)(x)) +# else +# define SWITCH_ENUM_CAST(x) (x) +# endif -#endif /* not emacs */ +# endif /* not emacs */ -#if defined _LIBC || HAVE_LIMITS_H -# include -#endif +# if defined _LIBC || HAVE_LIMITS_H +# include +# endif -#ifndef MB_LEN_MAX -# define MB_LEN_MAX 1 -#endif +# ifndef MB_LEN_MAX +# define MB_LEN_MAX 1 +# endif /* Get the interface, including the syntax bits. */ -#include +# include /* isalpha etc. are used for the character classes. */ -#include +# include /* Jim Meyering writes: @@ -226,67 +204,67 @@ char *realloc (); eliminate the && through constant folding." Solaris defines some of these symbols so we must undefine them first. */ -#undef ISASCII -#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) -# define ISASCII(c) 1 -#else -# define ISASCII(c) isascii(c) -#endif +# undef ISASCII +# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) +# define ISASCII(c) 1 +# else +# define ISASCII(c) isascii(c) +# endif -#ifdef isblank -# define ISBLANK(c) (ISASCII (c) && isblank (c)) -#else -# define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#endif -#ifdef isgraph -# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) -#else -# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) -#endif +# ifdef isblank +# define ISBLANK(c) (ISASCII (c) && isblank (c)) +# else +# define ISBLANK(c) ((c) == ' ' || (c) == '\t') +# endif +# ifdef isgraph +# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +# else +# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +# endif -#undef ISPRINT -#define ISPRINT(c) (ISASCII (c) && isprint (c)) -#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) -#define ISALNUM(c) (ISASCII (c) && isalnum (c)) -#define ISALPHA(c) (ISASCII (c) && isalpha (c)) -#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) -#define ISLOWER(c) (ISASCII (c) && islower (c)) -#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) -#define ISSPACE(c) (ISASCII (c) && isspace (c)) -#define ISUPPER(c) (ISASCII (c) && isupper (c)) -#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) - -#ifdef _tolower -# define TOLOWER(c) _tolower(c) -#else -# define TOLOWER(c) tolower(c) -#endif +# undef ISPRINT +# define ISPRINT(c) (ISASCII (c) && isprint (c)) +# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +# define ISALNUM(c) (ISASCII (c) && isalnum (c)) +# define ISALPHA(c) (ISASCII (c) && isalpha (c)) +# define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +# define ISLOWER(c) (ISASCII (c) && islower (c)) +# define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +# define ISSPACE(c) (ISASCII (c) && isspace (c)) +# define ISUPPER(c) (ISASCII (c) && isupper (c)) +# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) + +# ifdef _tolower +# define TOLOWER(c) _tolower(c) +# else +# define TOLOWER(c) tolower(c) +# endif -#ifndef NULL -# define NULL (void *)0 -#endif +# ifndef NULL +# define NULL (void *)0 +# endif /* We remove any previous definition of `SIGN_EXTEND_CHAR', since ours (we hope) works properly with all combinations of machines, compilers, `char' and `unsigned char' argument types. (Per Bothner suggested the basic approach.) */ -#undef SIGN_EXTEND_CHAR -#if __STDC__ -# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -#else /* not __STDC__ */ +# undef SIGN_EXTEND_CHAR +# if __STDC__ +# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +# else /* not __STDC__ */ /* As in Harbison and Steele. */ -# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) -#endif +# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +# endif -#ifndef emacs +# ifndef emacs /* How many characters in the character set. */ -# define CHAR_SET_SIZE 256 +# define CHAR_SET_SIZE 256 -# ifdef SYNTAX_TABLE +# ifdef SYNTAX_TABLE extern char *re_syntax_table; -# else /* not SYNTAX_TABLE */ +# else /* not SYNTAX_TABLE */ static char re_syntax_table[CHAR_SET_SIZE]; @@ -311,12 +289,17 @@ init_syntax_once () done = 1; } -# endif /* not SYNTAX_TABLE */ +# endif /* not SYNTAX_TABLE */ -# define SYNTAX(c) re_syntax_table[(unsigned char) (c)] +# define SYNTAX(c) re_syntax_table[(unsigned char) (c)] -#endif /* emacs */ +# endif /* emacs */ +/* Integer type for pointers. */ +# if !defined _LIBC +typedef unsigned long int uintptr_t; +# endif + /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we use `alloca' instead of `malloc'. This is because using malloc in re_search* or re_match* could cause memory leaks when C-g is used in @@ -327,104 +310,130 @@ init_syntax_once () not functions -- `alloca'-allocated space disappears at the end of the function it is called in. */ -#ifdef REGEX_MALLOC +# ifdef REGEX_MALLOC -# define REGEX_ALLOCATE malloc -# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE free +# define REGEX_ALLOCATE malloc +# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) +# define REGEX_FREE free -#else /* not REGEX_MALLOC */ +# else /* not REGEX_MALLOC */ /* Emacs already defines alloca, sometimes. */ -# ifndef alloca +# ifndef alloca /* Make alloca work the best possible way. */ -# ifdef __GNUC__ -# define alloca __builtin_alloca -# else /* not __GNUC__ */ -# if HAVE_ALLOCA_H -# include -# endif /* HAVE_ALLOCA_H */ -# endif /* not __GNUC__ */ +# ifdef __GNUC__ +# define alloca __builtin_alloca +# else /* not __GNUC__ */ +# if HAVE_ALLOCA_H +# include +# endif /* HAVE_ALLOCA_H */ +# endif /* not __GNUC__ */ -# endif /* not alloca */ +# endif /* not alloca */ -# define REGEX_ALLOCATE alloca +# define REGEX_ALLOCATE alloca /* Assumes a `char *destination' variable. */ -# define REGEX_REALLOCATE(source, osize, nsize) \ +# define REGEX_REALLOCATE(source, osize, nsize) \ (destination = (char *) alloca (nsize), \ memcpy (destination, source, osize)) /* No need to do anything to free, after alloca. */ -# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ +# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ -#endif /* not REGEX_MALLOC */ +# endif /* not REGEX_MALLOC */ /* Define how to allocate the failure stack. */ -#if defined REL_ALLOC && defined REGEX_MALLOC +# if defined REL_ALLOC && defined REGEX_MALLOC -# define REGEX_ALLOCATE_STACK(size) \ +# define REGEX_ALLOCATE_STACK(size) \ r_alloc (&failure_stack_ptr, (size)) -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ +# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ r_re_alloc (&failure_stack_ptr, (nsize)) -# define REGEX_FREE_STACK(ptr) \ +# define REGEX_FREE_STACK(ptr) \ r_alloc_free (&failure_stack_ptr) -#else /* not using relocating allocator */ +# else /* not using relocating allocator */ -# ifdef REGEX_MALLOC +# ifdef REGEX_MALLOC -# define REGEX_ALLOCATE_STACK malloc -# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE_STACK free +# define REGEX_ALLOCATE_STACK malloc +# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) +# define REGEX_FREE_STACK free -# else /* not REGEX_MALLOC */ +# else /* not REGEX_MALLOC */ -# define REGEX_ALLOCATE_STACK alloca +# define REGEX_ALLOCATE_STACK alloca -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ +# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ REGEX_REALLOCATE (source, osize, nsize) /* No need to explicitly free anything. */ -# define REGEX_FREE_STACK(arg) +# define REGEX_FREE_STACK(arg) -# endif /* not REGEX_MALLOC */ -#endif /* not using relocating allocator */ +# endif /* not REGEX_MALLOC */ +# endif /* not using relocating allocator */ /* True if `size1' is non-NULL and PTR is pointing anywhere inside `string1' or just past its end. This works if PTR is NULL, which is a good thing. */ -#define FIRST_STRING_P(ptr) \ +# define FIRST_STRING_P(ptr) \ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) /* (Re)Allocate N items of type T using malloc, or fail. */ -#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -#define RETALLOC_IF(addr, n, t) \ +# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) +# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +# define RETALLOC_IF(addr, n, t) \ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) -#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) +# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) -#define BYTEWIDTH 8 /* In bits. */ +# define BYTEWIDTH 8 /* In bits. */ -#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) +# define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) -#undef MAX -#undef MIN -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) +# undef MAX +# undef MIN +# define MAX(a, b) ((a) > (b) ? (a) : (b)) +# define MIN(a, b) ((a) < (b) ? (a) : (b)) typedef char boolean; -#define false 0 -#define true 1 - -static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, - const char *string1, int size1, - const char *string2, int size2, - int pos, - struct re_registers *regs, - int stop)); +# define false 0 +# define true 1 + +static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size, + reg_syntax_t syntax, + struct re_pattern_buffer *bufp)); +static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size, + reg_syntax_t syntax, + struct re_pattern_buffer *bufp)); + +static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int pos, + struct re_registers *regs, + int stop)); +static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int pos, + struct re_registers *regs, + int stop)); +static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int startpos, int range, + struct re_registers *regs, int stop)); +static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int startpos, int range, + struct re_registers *regs, int stop)); +static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp)); +static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp)); + /* These are the command codes that appear in compiled regular expressions. Some opcodes are followed by argument bytes. A @@ -441,10 +450,10 @@ typedef enum /* Followed by one byte giving n, then by n literal bytes. */ exactn, -#ifdef MBS_SUPPORT +# ifdef MBS_SUPPORT /* Same as exactn, but contains binary data. */ exactn_bin, -#endif +# endif /* Matches any (more or less) character. */ anychar, @@ -570,7 +579,7 @@ typedef enum wordbound, /* Succeeds if at a word boundary. */ notwordbound /* Succeeds if not at a word boundary. */ -#ifdef emacs +# ifdef emacs ,before_dot, /* Succeeds if before point. */ at_dot, /* Succeeds if at point. */ after_dot, /* Succeeds if after point. */ @@ -581,33 +590,69 @@ typedef enum /* Matches any character whose syntax is not that specified. */ notsyntaxspec -#endif /* emacs */ +# endif /* emacs */ } re_opcode_t; +#endif /* not INSIDE_RECURSION */ + +#ifdef BYTE +# define CHAR_T char +# define UCHAR_T unsigned char +# define COMPILED_BUFFER_VAR bufp->buffer +# define OFFSET_ADDRESS_SIZE 2 +# define PREFIX(name) byte_##name +# define ARG_PREFIX(name) name +# define PUT_CHAR(c) putchar (c) +#elif defined WCHAR +# define CHAR_T wchar_t +# define UCHAR_T wchar_t +# define COMPILED_BUFFER_VAR wc_buffer +# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ +# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1) +# define PREFIX(name) wcs_##name +# define ARG_PREFIX(name) c##name +/* Should we use wide stream?? */ +# define PUT_CHAR(c) printf ("%C", c); +# define TRUE 1 +# define FALSE 0 +#else +# ifdef MBS_SUPPORT +# define WCHAR +# define INSIDE_RECURSION +# include "regex.c" +# undef INSIDE_RECURSION +# endif +# define BYTE +# define INSIDE_RECURSION +# include "regex.c" +# undef INSIDE_RECURSION +#endif + +#ifdef INSIDE_RECURSION /* Common operations on the compiled pattern. */ /* Store NUMBER in two contiguous bytes starting at DESTINATION. */ /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ -#ifdef MBS_SUPPORT -# define STORE_NUMBER(destination, number) \ +# ifdef WCHAR +# define STORE_NUMBER(destination, number) \ do { \ - *(destination) = (US_CHAR_TYPE)(number); \ + *(destination) = (UCHAR_T)(number); \ } while (0) -#else -# define STORE_NUMBER(destination, number) \ +# else /* BYTE */ +# define STORE_NUMBER(destination, number) \ do { \ (destination)[0] = (number) & 0377; \ (destination)[1] = (number) >> 8; \ } while (0) -#endif /* MBS_SUPPORT */ +# endif /* WCHAR */ /* Same as STORE_NUMBER, except increment DESTINATION to the byte after where the number is stored. Therefore, DESTINATION must be an lvalue. */ /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ -#define STORE_NUMBER_AND_INCR(destination, number) \ +# define STORE_NUMBER_AND_INCR(destination, number) \ do { \ STORE_NUMBER (destination, number); \ (destination) += OFFSET_ADDRESS_SIZE; \ @@ -617,100 +662,107 @@ typedef enum at SOURCE. */ /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ -#ifdef MBS_SUPPORT -# define EXTRACT_NUMBER(destination, source) \ +# ifdef WCHAR +# define EXTRACT_NUMBER(destination, source) \ do { \ (destination) = *(source); \ } while (0) -#else -# define EXTRACT_NUMBER(destination, source) \ +# else /* BYTE */ +# define EXTRACT_NUMBER(destination, source) \ do { \ (destination) = *(source) & 0377; \ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ } while (0) -#endif +# endif -#ifdef DEBUG -static void extract_number _RE_ARGS ((int *dest, US_CHAR_TYPE *source)); +# ifdef DEBUG +static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source)); static void -extract_number (dest, source) +PREFIX(extract_number) (dest, source) int *dest; - US_CHAR_TYPE *source; + UCHAR_T *source; { -#ifdef MBS_SUPPORT +# ifdef WCHAR *dest = *source; -#else +# else /* BYTE */ int temp = SIGN_EXTEND_CHAR (*(source + 1)); *dest = *source & 0377; *dest += temp << 8; -#endif +# endif } -# ifndef EXTRACT_MACROS /* To debug the macros. */ -# undef EXTRACT_NUMBER -# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) -# endif /* not EXTRACT_MACROS */ +# ifndef EXTRACT_MACROS /* To debug the macros. */ +# undef EXTRACT_NUMBER +# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src) +# endif /* not EXTRACT_MACROS */ -#endif /* DEBUG */ +# endif /* DEBUG */ /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. SOURCE must be an lvalue. */ -#define EXTRACT_NUMBER_AND_INCR(destination, source) \ +# define EXTRACT_NUMBER_AND_INCR(destination, source) \ do { \ EXTRACT_NUMBER (destination, source); \ (source) += OFFSET_ADDRESS_SIZE; \ } while (0) -#ifdef DEBUG -static void extract_number_and_incr _RE_ARGS ((int *destination, - US_CHAR_TYPE **source)); +# ifdef DEBUG +static void PREFIX(extract_number_and_incr) _RE_ARGS ((int *destination, + UCHAR_T **source)); static void -extract_number_and_incr (destination, source) +PREFIX(extract_number_and_incr) (destination, source) int *destination; - US_CHAR_TYPE **source; + UCHAR_T **source; { - extract_number (destination, *source); + PREFIX(extract_number) (destination, *source); *source += OFFSET_ADDRESS_SIZE; } -# ifndef EXTRACT_MACROS -# undef EXTRACT_NUMBER_AND_INCR -# define EXTRACT_NUMBER_AND_INCR(dest, src) \ - extract_number_and_incr (&dest, &src) -# endif /* not EXTRACT_MACROS */ +# ifndef EXTRACT_MACROS +# undef EXTRACT_NUMBER_AND_INCR +# define EXTRACT_NUMBER_AND_INCR(dest, src) \ + PREFIX(extract_number_and_incr) (&dest, &src) +# endif /* not EXTRACT_MACROS */ + +# endif /* DEBUG */ -#endif /* DEBUG */ + /* If DEBUG is defined, Regex prints many voluminous messages about what it is doing (if the variable `debug' is nonzero). If linked with the main program in `iregex.c', you can enter patterns and strings interactively. And if linked with the main program in `main.c' and the other test files, you can run the already-written tests. */ -#ifdef DEBUG +# ifdef DEBUG + +# ifndef DEFINED_ONCE /* We use standard I/O for debugging. */ -# include +# include /* It is useful to test things that ``must'' be true when debugging. */ -# include +# include static int debug; -# define DEBUG_STATEMENT(e) e -# define DEBUG_PRINT1(x) if (debug) printf (x) -# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) -# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug) print_partial_compiled_pattern (s, e) -# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug) print_double_string (w, s1, sz1, s2, sz2) +# define DEBUG_STATEMENT(e) e +# define DEBUG_PRINT1(x) if (debug) printf (x) +# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) +# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) +# endif /* not DEFINED_ONCE */ + +# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ + if (debug) PREFIX(print_partial_compiled_pattern) (s, e) +# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ + if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2) /* Print the fastmap in human-readable form. */ +# ifndef DEFINED_ONCE void print_fastmap (fastmap) char *fastmap; @@ -738,20 +790,21 @@ print_fastmap (fastmap) } putchar ('\n'); } +# endif /* not DEFINED_ONCE */ /* Print a compiled pattern string in human-readable form, starting at the START pointer into it and ending just before the pointer END. */ void -print_partial_compiled_pattern (start, end) - US_CHAR_TYPE *start; - US_CHAR_TYPE *end; +PREFIX(print_partial_compiled_pattern) (start, end) + UCHAR_T *start; + UCHAR_T *end; { int mcnt, mcnt2; - US_CHAR_TYPE *p1; - US_CHAR_TYPE *p = start; - US_CHAR_TYPE *pend = end; + UCHAR_T *p1; + UCHAR_T *p = start; + UCHAR_T *pend = end; if (start == NULL) { @@ -762,11 +815,11 @@ print_partial_compiled_pattern (start, end) /* Loop over pattern commands. */ while (p < pend) { -#ifdef _LIBC +# ifdef _LIBC printf ("%td:\t", p - start); -#else +# else printf ("%ld:\t", (long int) (p - start)); -#endif +# endif switch ((re_opcode_t) *p++) { @@ -785,7 +838,7 @@ print_partial_compiled_pattern (start, end) while (--mcnt); break; -#ifdef MBS_SUPPORT +# ifdef MBS_SUPPORT case exactn_bin: mcnt = *p++; printf ("/exactn_bin/%d", mcnt); @@ -795,7 +848,7 @@ print_partial_compiled_pattern (start, end) } while (--mcnt); break; -#endif /* MBS_SUPPORT */ +# endif /* MBS_SUPPORT */ case start_memory: mcnt = *p++; @@ -818,7 +871,7 @@ print_partial_compiled_pattern (start, end) case charset: case charset_not: { -#ifdef MBS_SUPPORT +# ifdef WCHAR int i, length; wchar_t *workp = p; printf ("/charset [%s", @@ -850,19 +903,13 @@ print_partial_compiled_pattern (start, end) { wchar_t range_start = *p++; wchar_t range_end = *p++; - if (MB_CUR_MAX == 1) - printf("%c-%c", (char) range_start, (char) range_end); - else - printf("%C-%C", (wint_t) range_start, (wint_t) range_end); + printf("%C-%C", range_start, range_end); } length = *workp++; /* the length of char */ for (i=0 ; ibuffer; + UCHAR_T *buffer = (UCHAR_T*) bufp->buffer; - print_partial_compiled_pattern (buffer, buffer - + bufp->used / sizeof(US_CHAR_TYPE)); + PREFIX(print_partial_compiled_pattern) (buffer, buffer + + bufp->used / sizeof(UCHAR_T)); printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, bufp->allocated); @@ -1101,11 +1148,11 @@ print_compiled_pattern (bufp) print_fastmap (bufp->fastmap); } -#ifdef _LIBC +# ifdef _LIBC printf ("re_nsub: %Zd\t", bufp->re_nsub); -#else +# else printf ("re_nsub: %ld\t", (long int) bufp->re_nsub); -#endif +# endif printf ("regs_alloc: %d\t", bufp->regs_allocated); printf ("can_be_null: %d\t", bufp->can_be_null); printf ("newline_anchor: %d\n", bufp->newline_anchor); @@ -1118,10 +1165,10 @@ print_compiled_pattern (bufp) void -print_double_string (where, string1, size1, string2, size2) - const CHAR_TYPE *where; - const CHAR_TYPE *string1; - const CHAR_TYPE *string2; +PREFIX(print_double_string) (where, string1, size1, string2, size2) + const CHAR_T *where; + const CHAR_T *string1; + const CHAR_T *string2; int size1; int size2; { @@ -1144,29 +1191,35 @@ print_double_string (where, string1, size1, string2, size2) } } +# ifndef DEFINED_ONCE void printchar (c) int c; { putc (c, stderr); } +# endif + +# else /* not DEBUG */ -#else /* not DEBUG */ +# ifndef DEFINED_ONCE +# undef assert +# define assert(e) -# undef assert -# define assert(e) +# define DEBUG_STATEMENT(e) +# define DEBUG_PRINT1(x) +# define DEBUG_PRINT2(x1, x2) +# define DEBUG_PRINT3(x1, x2, x3) +# define DEBUG_PRINT4(x1, x2, x3, x4) +# endif /* not DEFINED_ONCE */ +# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) +# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) -# define DEBUG_STATEMENT(e) -# define DEBUG_PRINT1(x) -# define DEBUG_PRINT2(x1, x2) -# define DEBUG_PRINT3(x1, x2, x3) -# define DEBUG_PRINT4(x1, x2, x3, x4) -# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) -# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) +# endif /* not DEBUG */ -#endif /* not DEBUG */ -#ifdef MBS_SUPPORT + +# ifdef WCHAR /* This convert a multibyte string to a wide character string. And write their correspondances to offset_buffer(see below) and write whether each wchar_t is binary data to is_binary. @@ -1174,12 +1227,12 @@ printchar (c) We assume offset_buffer and is_binary is already allocated enough space. */ -static size_t convert_mbs_to_wcs (CHAR_TYPE *dest, const unsigned char* src, +static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src, size_t len, int *offset_buffer, char *is_binary); static size_t convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary) - CHAR_TYPE *dest; + CHAR_T *dest; const unsigned char* src; size_t len; /* the length of multibyte string. */ @@ -1199,58 +1252,46 @@ convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary) const unsigned char *psrc = src; size_t wc_count = 0; - if (MB_CUR_MAX == 1) - { /* We don't need conversion. */ - for ( ; wc_count < len ; ++wc_count) - { - *pdest++ = *psrc++; - is_binary[wc_count] = FALSE; - offset_buffer[wc_count] = wc_count; - } - offset_buffer[wc_count] = wc_count; - } - else + mbstate_t mbs; + int consumed; + size_t mb_remain = len; + size_t mb_count = 0; + + /* Initialize the conversion state. */ + memset (&mbs, 0, sizeof (mbstate_t)); + + offset_buffer[0] = 0; + for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed, + psrc += consumed) { - /* We need conversion. */ - mbstate_t mbs; - int consumed; - size_t mb_remain = len; - size_t mb_count = 0; - - /* Initialize the conversion state. */ - memset (&mbs, 0, sizeof (mbstate_t)); - - offset_buffer[0] = 0; - for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed, - psrc += consumed) - { - consumed = mbrtowc (pdest, psrc, mb_remain, &mbs); + consumed = mbrtowc (pdest, psrc, mb_remain, &mbs); - if (consumed <= 0) - /* failed to convert. maybe src contains binary data. - So we consume 1 byte manualy. */ - { - *pdest = *psrc; - consumed = 1; - is_binary[wc_count] = TRUE; - } - else - is_binary[wc_count] = FALSE; - /* In sjis encoding, we use yen sign as escape character in - place of reverse solidus. So we convert 0x5c(yen sign in - sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse - solidus in UCS2). */ - if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5) - *pdest = (wchar_t) *psrc; - - offset_buffer[wc_count + 1] = mb_count += consumed; + if (consumed <= 0) + /* failed to convert. maybe src contains binary data. + So we consume 1 byte manualy. */ + { + *pdest = *psrc; + consumed = 1; + is_binary[wc_count] = TRUE; } + else + is_binary[wc_count] = FALSE; + /* In sjis encoding, we use yen sign as escape character in + place of reverse solidus. So we convert 0x5c(yen sign in + sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse + solidus in UCS2). */ + if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5) + *pdest = (wchar_t) *psrc; + + offset_buffer[wc_count + 1] = mb_count += consumed; } return wc_count; } -#endif /* MBS_SUPPORT */ +# endif /* WCHAR */ + +#else /* not INSIDE_RECURSION */ /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can also be assigned to arbitrarily: each pattern buffer stores its own @@ -1274,17 +1315,17 @@ re_set_syntax (syntax) reg_syntax_t ret = re_syntax_options; re_syntax_options = syntax; -#ifdef DEBUG +# ifdef DEBUG if (syntax & RE_DEBUG) debug = 1; else if (debug) /* was on but now is not */ debug = 0; -#endif /* DEBUG */ +# endif /* DEBUG */ return ret; } -#ifdef _LIBC +# ifdef _LIBC weak_alias (__re_set_syntax, re_set_syntax) -#endif +# endif /* This table gives an error message for each of the error codes listed in regex.h. Obviously the order here has to be same as there. @@ -1293,55 +1334,55 @@ weak_alias (__re_set_syntax, re_set_syntax) static const char re_error_msgid[] = { -#define REG_NOERROR_IDX 0 +# define REG_NOERROR_IDX 0 gettext_noop ("Success") /* REG_NOERROR */ "\0" -#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") +# define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") gettext_noop ("No match") /* REG_NOMATCH */ "\0" -#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") +# define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") gettext_noop ("Invalid regular expression") /* REG_BADPAT */ "\0" -#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") +# define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ "\0" -#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") +# define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") gettext_noop ("Invalid character class name") /* REG_ECTYPE */ "\0" -#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") +# define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") gettext_noop ("Trailing backslash") /* REG_EESCAPE */ "\0" -#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") +# define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") gettext_noop ("Invalid back reference") /* REG_ESUBREG */ "\0" -#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") +# define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ "\0" -#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") +# define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ "\0" -#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") +# define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") gettext_noop ("Unmatched \\{") /* REG_EBRACE */ "\0" -#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") +# define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ "\0" -#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") +# define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") gettext_noop ("Invalid range end") /* REG_ERANGE */ "\0" -#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") +# define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") gettext_noop ("Memory exhausted") /* REG_ESPACE */ "\0" -#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") +# define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ "\0" -#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") +# define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") gettext_noop ("Premature end of regular expression") /* REG_EEND */ "\0" -#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") +# define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") gettext_noop ("Regular expression too big") /* REG_ESIZE */ "\0" -#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") +# define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ }; @@ -1366,6 +1407,9 @@ static const size_t re_error_msgid_idx[] = REG_ERPAREN_IDX }; +#endif /* INSIDE_RECURSION */ + +#ifndef DEFINED_ONCE /* Avoiding alloca during matching, to placate r_alloc. */ /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the @@ -1386,24 +1430,25 @@ static const size_t re_error_msgid_idx[] = faith that they will not malloc. */ /* Normally, this is fine. */ -#define MATCH_MAY_ALLOCATE +# define MATCH_MAY_ALLOCATE /* When using GNU C, we are not REALLY using the C alloca, no matter what config.h may say. So don't take precautions for it. */ -#ifdef __GNUC__ -# undef C_ALLOCA -#endif +# ifdef __GNUC__ +# undef C_ALLOCA +# endif /* The match routines may not allocate if (1) they would do it with malloc and (2) it's not safe for them to use malloc. Note that if REL_ALLOC is defined, matching would not use malloc for the failure stack, but we would still use it for the register vectors; so REL_ALLOC should not affect this. */ -#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs -# undef MATCH_MAY_ALLOCATE -#endif - +# if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs +# undef MATCH_MAY_ALLOCATE +# endif +#endif /* not DEFINED_ONCE */ +#ifdef INSIDE_RECURSION /* Failure stack declarations and macros; both re_compile_fastmap and re_match_2 use a failure stack. These have to be macros because of REGEX_ALLOCATE_STACK. */ @@ -1412,97 +1457,103 @@ static const size_t re_error_msgid_idx[] = /* Number of failure points for which to initially allocate space when matching. If this number is exceeded, we allocate more space, so it is not a hard limit. */ -#ifndef INIT_FAILURE_ALLOC -# define INIT_FAILURE_ALLOC 5 -#endif +# ifndef INIT_FAILURE_ALLOC +# define INIT_FAILURE_ALLOC 5 +# endif /* Roughly the maximum number of failure points on the stack. Would be exactly that if always used MAX_FAILURE_ITEMS items each time we failed. This is a variable only so users of regex can assign to it; we never change it ourselves. */ -#ifdef INT_IS_16BIT +# ifdef INT_IS_16BIT -# if defined MATCH_MAY_ALLOCATE +# ifndef DEFINED_ONCE +# if defined MATCH_MAY_ALLOCATE /* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */ long int re_max_failures = 4000; -# else +# else long int re_max_failures = 2000; -# endif +# endif +# endif -union fail_stack_elt +union PREFIX(fail_stack_elt) { - US_CHAR_TYPE *pointer; + UCHAR_T *pointer; long int integer; }; -typedef union fail_stack_elt fail_stack_elt_t; +typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); typedef struct { - fail_stack_elt_t *stack; + PREFIX(fail_stack_elt_t) *stack; unsigned long int size; unsigned long int avail; /* Offset of next open position. */ -} fail_stack_type; +} PREFIX(fail_stack_type); -#else /* not INT_IS_16BIT */ +# else /* not INT_IS_16BIT */ -# if defined MATCH_MAY_ALLOCATE +# ifndef DEFINED_ONCE +# if defined MATCH_MAY_ALLOCATE /* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */ int re_max_failures = 4000; -# else +# else int re_max_failures = 2000; -# endif +# endif +# endif -union fail_stack_elt +union PREFIX(fail_stack_elt) { - US_CHAR_TYPE *pointer; + UCHAR_T *pointer; int integer; }; -typedef union fail_stack_elt fail_stack_elt_t; +typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); typedef struct { - fail_stack_elt_t *stack; + PREFIX(fail_stack_elt_t) *stack; unsigned size; unsigned avail; /* Offset of next open position. */ -} fail_stack_type; +} PREFIX(fail_stack_type); -#endif /* INT_IS_16BIT */ +# endif /* INT_IS_16BIT */ -#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) -#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) -#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) +# ifndef DEFINED_ONCE +# define FAIL_STACK_EMPTY() (fail_stack.avail == 0) +# define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) +# define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) +# endif /* Define macros to initialize and free the failure stack. Do `return -2' if the alloc fails. */ -#ifdef MATCH_MAY_ALLOCATE -# define INIT_FAIL_STACK() \ +# ifdef MATCH_MAY_ALLOCATE +# define INIT_FAIL_STACK() \ do { \ - fail_stack.stack = (fail_stack_elt_t *) \ - REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ + fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \ + REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \ \ - if (fail_stack.stack == NULL) \ + if (fail_stack.stack == NULL) \ return -2; \ \ - fail_stack.size = INIT_FAILURE_ALLOC; \ - fail_stack.avail = 0; \ + fail_stack.size = INIT_FAILURE_ALLOC; \ + fail_stack.avail = 0; \ } while (0) -# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) -#else -# define INIT_FAIL_STACK() \ +# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) +# else +# define INIT_FAIL_STACK() \ do { \ - fail_stack.avail = 0; \ + fail_stack.avail = 0; \ } while (0) -# define RESET_FAIL_STACK() -#endif +# define RESET_FAIL_STACK() +# endif /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. @@ -1512,13 +1563,13 @@ typedef struct REGEX_REALLOCATE_STACK requires `destination' be declared. */ -#define DOUBLE_FAIL_STACK(fail_stack) \ +# define DOUBLE_FAIL_STACK(fail_stack) \ ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ ? 0 \ - : ((fail_stack).stack = (fail_stack_elt_t *) \ + : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \ REGEX_REALLOCATE_STACK ((fail_stack).stack, \ - (fail_stack).size * sizeof (fail_stack_elt_t), \ - ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ + (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \ + ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\ \ (fail_stack).stack == NULL \ ? 0 \ @@ -1529,7 +1580,7 @@ typedef struct /* Push pointer POINTER on FAIL_STACK. Return 1 if was able to do so and 0 if ran out of memory allocating space to do so. */ -#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ +# define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ ((FAIL_STACK_FULL () \ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ ? 0 \ @@ -1539,35 +1590,35 @@ typedef struct /* Push a pointer value onto the failure stack. Assumes the variable `fail_stack'. Probably should only be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_POINTER(item) \ - fail_stack.stack[fail_stack.avail++].pointer = (US_CHAR_TYPE *) (item) +# define PUSH_FAILURE_POINTER(item) \ + fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item) /* This pushes an integer-valued item onto the failure stack. Assumes the variable `fail_stack'. Probably should only be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_INT(item) \ +# define PUSH_FAILURE_INT(item) \ fail_stack.stack[fail_stack.avail++].integer = (item) /* Push a fail_stack_elt_t value onto the failure stack. Assumes the variable `fail_stack'. Probably should only be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_ELT(item) \ +# define PUSH_FAILURE_ELT(item) \ fail_stack.stack[fail_stack.avail++] = (item) /* These three POP... operations complement the three PUSH... operations. All assume that `fail_stack' is nonempty. */ -#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer -#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer -#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] +# define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer +# define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer +# define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] /* Used to omit pushing failure point id's when we're not debugging. */ -#ifdef DEBUG -# define DEBUG_PUSH PUSH_FAILURE_INT -# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () -#else -# define DEBUG_PUSH(item) -# define DEBUG_POP(item_addr) -#endif +# ifdef DEBUG +# define DEBUG_PUSH PUSH_FAILURE_INT +# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () +# else +# define DEBUG_PUSH(item) +# define DEBUG_POP(item_addr) +# endif /* Push the information about the state we will need @@ -1579,7 +1630,7 @@ typedef struct Does `return FAILURE_CODE' if runs out of memory. */ -#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ +# define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ do { \ char *destination; \ /* Must be int, so when we don't save any registers, the arithmetic \ @@ -1658,32 +1709,34 @@ typedef struct DEBUG_PUSH (failure_id); \ } while (0) +# ifndef DEFINED_ONCE /* This is the number of items that are pushed and popped on the stack for each register. */ -#define NUM_REG_ITEMS 3 +# define NUM_REG_ITEMS 3 /* Individual items aside from the registers. */ -#ifdef DEBUG -# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ -#else -# define NUM_NONREG_ITEMS 4 -#endif +# ifdef DEBUG +# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ +# else +# define NUM_NONREG_ITEMS 4 +# endif /* We push at most this many items on the stack. */ /* We used to use (num_regs - 1), which is the number of registers this regexp will save; but that was changed to 5 to avoid stack overflow for a regexp with lots of parens. */ -#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) +# define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) /* We actually push this many items. */ -#define NUM_FAILURE_ITEMS \ +# define NUM_FAILURE_ITEMS \ (((0 \ ? 0 : highest_active_reg - lowest_active_reg + 1) \ * NUM_REG_ITEMS) \ + NUM_NONREG_ITEMS) /* How many items can still be added to the stack without overflowing it. */ -#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) +# define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) +# endif /* not DEFINED_ONCE */ /* Pops what PUSH_FAIL_STACK pushes. @@ -1697,11 +1750,11 @@ typedef struct Also assumes the variables `fail_stack' and (if debugging), `bufp', `pend', `string1', `size1', `string2', and `size2'. */ -#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ +# define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ { \ DEBUG_STATEMENT (unsigned failure_id;) \ active_reg_t this_reg; \ - const US_CHAR_TYPE *string_temp; \ + const UCHAR_T *string_temp; \ \ assert (!FAIL_STACK_EMPTY ()); \ \ @@ -1720,13 +1773,13 @@ typedef struct saved NULL, thus retaining our current position in the string. */ \ string_temp = POP_FAILURE_POINTER (); \ if (string_temp != NULL) \ - str = (const CHAR_TYPE *) string_temp; \ + str = (const CHAR_T *) string_temp; \ \ DEBUG_PRINT2 (" Popping string %p: `", str); \ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ DEBUG_PRINT1 ("'\n"); \ \ - pat = (US_CHAR_TYPE *) POP_FAILURE_POINTER (); \ + pat = (UCHAR_T *) POP_FAILURE_POINTER (); \ DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ \ @@ -1746,10 +1799,10 @@ typedef struct DEBUG_PRINT2 (" info: %p\n", \ reg_info[this_reg].word.pointer); \ \ - regend[this_reg] = (const CHAR_TYPE *) POP_FAILURE_POINTER (); \ + regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ \ - regstart[this_reg] = (const CHAR_TYPE *) POP_FAILURE_POINTER ();\ + regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ } \ else \ @@ -1766,7 +1819,6 @@ typedef struct set_regs_matched_done = 0; \ DEBUG_STATEMENT (nfailure_points_popped++); \ } /* POP_FAILURE_POINT */ - /* Structure for per-register (a.k.a. per-group) information. Other register information, such as the @@ -1784,29 +1836,30 @@ typedef struct typedef union { - fail_stack_elt_t word; + PREFIX(fail_stack_elt_t) word; struct { /* This field is one if this group can match the empty string, zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ -#define MATCH_NULL_UNSET_VALUE 3 +# define MATCH_NULL_UNSET_VALUE 3 unsigned match_null_string_p : 2; unsigned is_active : 1; unsigned matched_something : 1; unsigned ever_matched_something : 1; } bits; -} register_info_type; +} PREFIX(register_info_type); -#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) -#define IS_ACTIVE(R) ((R).bits.is_active) -#define MATCHED_SOMETHING(R) ((R).bits.matched_something) -#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) +# ifndef DEFINED_ONCE +# define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) +# define IS_ACTIVE(R) ((R).bits.is_active) +# define MATCHED_SOMETHING(R) ((R).bits.matched_something) +# define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) /* Call this when have matched a real character; it sets `matched' flags for the subexpressions which we are currently inside. Also records that those subexprs have matched. */ -#define SET_REGS_MATCHED() \ +# define SET_REGS_MATCHED() \ do \ { \ if (!set_regs_matched_done) \ @@ -1822,47 +1875,44 @@ typedef union } \ } \ while (0) +# endif /* not DEFINED_ONCE */ /* Registers are set to a sentinel when they haven't yet matched. */ -static CHAR_TYPE reg_unset_dummy; -#define REG_UNSET_VALUE (®_unset_dummy) -#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) - -/* Subroutine declarations and macros for regex_compile. */ +static CHAR_T PREFIX(reg_unset_dummy); +# define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy)) +# define REG_UNSET(e) ((e) == REG_UNSET_VALUE) -static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size, - reg_syntax_t syntax, - struct re_pattern_buffer *bufp)); -static void store_op1 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc, int arg)); -static void store_op2 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc, +/* Subroutine declarations and macros for regex_compile. */ +static void PREFIX(store_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, int arg)); +static void PREFIX(store_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)); -static void insert_op1 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc, - int arg, US_CHAR_TYPE *end)); -static void insert_op2 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc, - int arg1, int arg2, US_CHAR_TYPE *end)); -static boolean at_begline_loc_p _RE_ARGS ((const CHAR_TYPE *pattern, - const CHAR_TYPE *p, +static void PREFIX(insert_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, + int arg, UCHAR_T *end)); +static void PREFIX(insert_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, + int arg1, int arg2, UCHAR_T *end)); +static boolean PREFIX(at_begline_loc_p) _RE_ARGS ((const CHAR_T *pattern, + const CHAR_T *p, reg_syntax_t syntax)); -static boolean at_endline_loc_p _RE_ARGS ((const CHAR_TYPE *p, - const CHAR_TYPE *pend, +static boolean PREFIX(at_endline_loc_p) _RE_ARGS ((const CHAR_T *p, + const CHAR_T *pend, reg_syntax_t syntax)); -#ifdef MBS_SUPPORT -static reg_errcode_t compile_range _RE_ARGS ((CHAR_TYPE range_start, - const CHAR_TYPE **p_ptr, - const CHAR_TYPE *pend, - char *translate, - reg_syntax_t syntax, - US_CHAR_TYPE *b, - CHAR_TYPE *char_set)); -static void insert_space _RE_ARGS ((int num, CHAR_TYPE *loc, CHAR_TYPE *end)); -#else -static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start, - const CHAR_TYPE **p_ptr, - const CHAR_TYPE *pend, - char *translate, - reg_syntax_t syntax, - US_CHAR_TYPE *b)); -#endif /* MBS_SUPPORT */ +# ifdef WCHAR +static reg_errcode_t wcs_compile_range _RE_ARGS ((CHAR_T range_start, + const CHAR_T **p_ptr, + const CHAR_T *pend, + char *translate, + reg_syntax_t syntax, + UCHAR_T *b, + CHAR_T *char_set)); +static void insert_space _RE_ARGS ((int num, CHAR_T *loc, CHAR_T *end)); +# else /* BYTE */ +static reg_errcode_t byte_compile_range _RE_ARGS ((unsigned int range_start, + const char **p_ptr, + const char *pend, + char *translate, + reg_syntax_t syntax, + unsigned char *b)); +# endif /* WCHAR */ /* Fetch the next character in the uncompiled pattern---translating it if necessary. Also cast from a signed character in the constant @@ -1871,31 +1921,31 @@ static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start, /* ifdef MBS_SUPPORT, we translate only if character <= 0xff, because it is impossible to allocate 4GB array for some encodings which have 4 byte character_set like UCS4. */ -#ifndef PATFETCH -# ifdef MBS_SUPPORT -# define PATFETCH(c) \ +# ifndef PATFETCH +# ifdef WCHAR +# define PATFETCH(c) \ do {if (p == pend) return REG_EEND; \ - c = (US_CHAR_TYPE) *p++; \ - if (translate && (c <= 0xff)) c = (US_CHAR_TYPE) translate[c]; \ + c = (UCHAR_T) *p++; \ + if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \ } while (0) -# else -# define PATFETCH(c) \ +# else /* BYTE */ +# define PATFETCH(c) \ do {if (p == pend) return REG_EEND; \ c = (unsigned char) *p++; \ if (translate) c = (unsigned char) translate[c]; \ } while (0) -# endif /* MBS_SUPPORT */ -#endif +# endif /* WCHAR */ +# endif /* Fetch the next character in the uncompiled pattern, with no translation. */ -#define PATFETCH_RAW(c) \ +# define PATFETCH_RAW(c) \ do {if (p == pend) return REG_EEND; \ - c = (US_CHAR_TYPE) *p++; \ + c = (UCHAR_T) *p++; \ } while (0) /* Go backwards one character in the pattern. */ -#define PATUNFETCH p-- +# define PATUNFETCH p-- /* If `translate' is non-null, return translate[D], else just D. We @@ -1905,80 +1955,80 @@ static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start, /* ifdef MBS_SUPPORT, we translate only if character <= 0xff, because it is impossible to allocate 4GB array for some encodings which have 4 byte character_set like UCS4. */ -#ifndef TRANSLATE -# ifdef MBS_SUPPORT -# define TRANSLATE(d) \ - ((translate && ((US_CHAR_TYPE) (d)) <= 0xff) \ + +# ifndef TRANSLATE +# ifdef WCHAR +# define TRANSLATE(d) \ + ((translate && ((UCHAR_T) (d)) <= 0xff) \ ? (char) translate[(unsigned char) (d)] : (d)) -#else -# define TRANSLATE(d) \ +# else /* BYTE */ +# define TRANSLATE(d) \ (translate ? (char) translate[(unsigned char) (d)] : (d)) -# endif /* MBS_SUPPORT */ -#endif +# endif /* WCHAR */ +# endif /* Macros for outputting the compiled pattern into `buffer'. */ /* If the buffer isn't allocated when it comes in, use this. */ -#define INIT_BUF_SIZE (32 * sizeof(US_CHAR_TYPE)) +# define INIT_BUF_SIZE (32 * sizeof(UCHAR_T)) /* Make sure we have at least N more bytes of space in buffer. */ -#ifdef MBS_SUPPORT -# define GET_BUFFER_SPACE(n) \ +# ifdef WCHAR +# define GET_BUFFER_SPACE(n) \ while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \ - + (n)*sizeof(CHAR_TYPE)) > bufp->allocated) \ + + (n)*size