diff options
Diffstat (limited to 'stdio-common/vfscanf.c')
| -rw-r--r-- | stdio-common/vfscanf.c | 3042 |
1 files changed, 1 insertions, 3041 deletions
diff --git a/stdio-common/vfscanf.c b/stdio-common/vfscanf.c index 1ce836a324..5eedca8340 100644 --- a/stdio-common/vfscanf.c +++ b/stdio-common/vfscanf.c @@ -15,3053 +15,13 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <assert.h> -#include <errno.h> -#include <limits.h> -#include <ctype.h> -#include <stdarg.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdint.h> -#include <stdlib.h> -#include <string.h> -#include <wchar.h> -#include <wctype.h> -#include <libc-diag.h> -#include <libc-lock.h> -#include <locale/localeinfo.h> -#include <scratch_buffer.h> - -#ifdef __GNUC__ -# define HAVE_LONGLONG -# define LONGLONG long long -#else -# define LONGLONG long -#endif - -/* Determine whether we have to handle `long long' at all. */ -#if LONG_MAX == LONG_LONG_MAX -# define need_longlong 0 -#else -# define need_longlong 1 -#endif - -/* Determine whether we have to handle `long'. */ -#if INT_MAX == LONG_MAX -# define need_long 0 -#else -# define need_long 1 -#endif - -/* Those are flags in the conversion format. */ -#define LONG 0x0001 /* l: long or double */ -#define LONGDBL 0x0002 /* L: long long or long double */ -#define SHORT 0x0004 /* h: short */ -#define SUPPRESS 0x0008 /* *: suppress assignment */ -#define POINTER 0x0010 /* weird %p pointer (`fake hex') */ -#define NOSKIP 0x0020 /* do not skip blanks */ -#define NUMBER_SIGNED 0x0040 /* signed integer */ -#define GROUP 0x0080 /* ': group numbers */ -#define GNU_MALLOC 0x0100 /* a: malloc strings */ -#define CHAR 0x0200 /* hh: char */ -#define I18N 0x0400 /* I: use locale's digits */ -#define HEXA_FLOAT 0x0800 /* hexadecimal float */ -#define READ_POINTER 0x1000 /* this is a pointer value */ -#define POSIX_MALLOC 0x2000 /* m: malloc strings */ -#define MALLOC (GNU_MALLOC | POSIX_MALLOC) - -#include <locale/localeinfo.h> #include <libioP.h> -#ifdef COMPILE_WSCANF -# define ungetc(c, s) ((void) (c == WEOF \ - || (--read_in, \ - _IO_sputbackwc (s, c)))) -# define ungetc_not_eof(c, s) ((void) (--read_in, \ - _IO_sputbackwc (s, c))) -# define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \ - : ((c = _IO_getwc_unlocked (s)), \ - (void) (c != WEOF \ - ? ++read_in \ - : (size_t) (inchar_errno = errno)), c)) - -# define ISSPACE(Ch) iswspace (Ch) -# define ISDIGIT(Ch) iswdigit (Ch) -# define ISXDIGIT(Ch) iswxdigit (Ch) -# define TOLOWER(Ch) towlower (Ch) -# define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF -# define __strtoll_internal __wcstoll_internal -# define __strtoull_internal __wcstoull_internal -# define __strtol_internal __wcstol_internal -# define __strtoul_internal __wcstoul_internal -# define __strtold_internal __wcstold_internal -# define __strtod_internal __wcstod_internal -# define __strtof_internal __wcstof_internal - -# define L_(Str) L##Str -# define CHAR_T wchar_t -# define UCHAR_T unsigned int -# define WINT_T wint_t -# undef EOF -# define EOF WEOF -#else -# define ungetc(c, s) ((void) ((int) c == EOF \ - || (--read_in, \ - _IO_sputbackc (s, (unsigned char) c)))) -# define ungetc_not_eof(c, s) ((void) (--read_in, \ - _IO_sputbackc (s, (unsigned char) c))) -# define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \ - : ((c = _IO_getc_unlocked (s)), \ - (void) (c != EOF \ - ? ++read_in \ - : (size_t) (inchar_errno = errno)), c)) -# define ISSPACE(Ch) __isspace_l (Ch, loc) -# define ISDIGIT(Ch) __isdigit_l (Ch, loc) -# define ISXDIGIT(Ch) __isxdigit_l (Ch, loc) -# define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc) -# define ORIENT if (_IO_vtable_offset (s) == 0 \ - && _IO_fwide (s, -1) != -1) \ - return EOF - -# define L_(Str) Str -# define CHAR_T char -# define UCHAR_T unsigned char -# define WINT_T int -#endif - -#include "printf-parse.h" /* Use read_int. */ - -#define encode_error() do { \ - errval = 4; \ - __set_errno (EILSEQ); \ - goto errout; \ - } while (0) -#define conv_error() do { \ - errval = 2; \ - goto errout; \ - } while (0) -#define input_error() do { \ - errval = 1; \ - if (done == 0) done = EOF; \ - goto errout; \ - } while (0) -#define add_ptr_to_free(ptr) \ - do \ - { \ - if (ptrs_to_free == NULL \ - || ptrs_to_free->count == (sizeof (ptrs_to_free->ptrs) \ - / sizeof (ptrs_to_free->ptrs[0]))) \ - { \ - struct ptrs_to_free *new_ptrs = alloca (sizeof (*ptrs_to_free)); \ - new_ptrs->count = 0; \ - new_ptrs->next = ptrs_to_free; \ - ptrs_to_free = new_ptrs; \ - } \ - ptrs_to_free->ptrs[ptrs_to_free->count++] = (ptr); \ - } \ - while (0) -#define ARGCHECK(s, format) \ - do \ - { \ - /* Check file argument for consistence. */ \ - CHECK_FILE (s, EOF); \ - if (s->_flags & _IO_NO_READS) \ - { \ - __set_errno (EBADF); \ - return EOF; \ - } \ - else if (format == NULL) \ - { \ - __set_errno (EINVAL); \ - return EOF; \ - } \ - } while (0) -#define LOCK_STREAM(S) \ - __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \ - _IO_flockfile (S) -#define UNLOCK_STREAM(S) \ - _IO_funlockfile (S); \ - __libc_cleanup_region_end (0) - -struct ptrs_to_free -{ - size_t count; - struct ptrs_to_free *next; - char **ptrs[32]; -}; - -struct char_buffer { - CHAR_T *current; - CHAR_T *end; - struct scratch_buffer scratch; -}; - -/* Returns a pointer to the first CHAR_T object in the buffer. Only - valid if char_buffer_add (BUFFER, CH) has been called and - char_buffer_error (BUFFER) is false. */ -static inline CHAR_T * -char_buffer_start (const struct char_buffer *buffer) -{ - return (CHAR_T *) buffer->scratch.data; -} - -/* Returns the number of CHAR_T objects in the buffer. Only valid if - char_buffer_error (BUFFER) is false. */ -static inline size_t -char_buffer_size (const struct char_buffer *buffer) -{ - return buffer->current - char_buffer_start (buffer); -} - -/* Reinitializes BUFFER->current and BUFFER->end to cover the entire - scratch buffer. */ -static inline void -char_buffer_rewind (struct char_buffer *buffer) -{ - buffer->current = char_buffer_start (buffer); - buffer->end = buffer->current + buffer->scratch.length / sizeof (CHAR_T); -} - -/* Returns true if a previous call to char_buffer_add (BUFFER, CH) - failed. */ -static inline bool -char_buffer_error (const struct char_buffer *buffer) -{ - return __glibc_unlikely (buffer->current == NULL); -} - -/* Slow path for char_buffer_add. */ -static void -char_buffer_add_slow (struct char_buffer *buffer, CHAR_T ch) -{ - if (char_buffer_error (buffer)) - return; - size_t offset = buffer->end - (CHAR_T *) buffer->scratch.data; - if (!scratch_buffer_grow_preserve (&buffer->scratch)) - { - buffer->current = NULL; - buffer->end = NULL; - return; - } - char_buffer_rewind (buffer); - buffer->current += offset; - *buffer->current++ = ch; -} - -/* Adds CH to BUFFER. This function does not report any errors, check - for them with char_buffer_error. */ -static inline void -char_buffer_add (struct char_buffer *buffer, CHAR_T ch) - __attribute__ ((always_inline)); -static inline void -char_buffer_add (struct char_buffer *buffer, CHAR_T ch) -{ - if (__glibc_unlikely (buffer->current == buffer->end)) - char_buffer_add_slow (buffer, ch); - else - *buffer->current++ = ch; -} - -/* Read formatted input from S according to the format string - FORMAT, using the argument list in ARG. - Return the number of assignments made, or -1 for an input error. */ -#ifdef COMPILE_WSCANF -int -_IO_vfwscanf (FILE *s, const wchar_t *format, va_list argptr, - int *errp) -#else -int -_IO_vfscanf_internal (FILE *s, const char *format, va_list argptr, - int *errp) -#endif -{ - va_list arg; - const CHAR_T *f = format; - UCHAR_T fc; /* Current character of the format. */ - WINT_T done = 0; /* Assignments done. */ - size_t read_in = 0; /* Chars read in. */ - WINT_T c = 0; /* Last char read. */ - int width; /* Maximum field width. */ - int flags; /* Modifiers for current format element. */ - int errval = 0; -#ifndef COMPILE_WSCANF - locale_t loc = _NL_CURRENT_LOCALE; - struct __locale_data *const curctype = loc->__locales[LC_CTYPE]; -#endif - - /* Errno of last failed inchar call. */ - int inchar_errno = 0; - /* Status for reading F-P nums. */ - char got_digit, got_dot, got_e, got_sign; - /* If a [...] is a [^...]. */ - CHAR_T not_in; -#define exp_char not_in - /* Base for integral numbers. */ - int base; - /* Decimal point character. */ -#ifdef COMPILE_WSCANF - wint_t decimal; -#else - const char *decimal; -#endif - /* The thousands character of the current locale. */ -#ifdef COMPILE_WSCANF - wint_t thousands; -#else - const char *thousands; -#endif - struct ptrs_to_free *ptrs_to_free = NULL; - /* State for the conversions. */ - mbstate_t state; - /* Integral holding variables. */ - union - { - long long int q; - unsigned long long int uq; - long int l; - unsigned long int ul; - } num; - /* Character-buffer pointer. */ - char *str = NULL; - wchar_t *wstr = NULL; - char **strptr = NULL; - ssize_t strsize = 0; - /* We must not react on white spaces immediately because they can - possibly be matched even if in the input stream no character is - available anymore. */ - int skip_space = 0; - /* Workspace. */ - CHAR_T *tw; /* Temporary pointer. */ - struct char_buffer charbuf; - scratch_buffer_init (&charbuf.scratch); - -#ifdef __va_copy - __va_copy (arg, argptr); -#else - arg = (va_list) argptr; -#endif - -#ifdef ORIENT - ORIENT; -#endif - - ARGCHECK (s, format); - - { -#ifndef COMPILE_WSCANF - struct __locale_data *const curnumeric = loc->__locales[LC_NUMERIC]; -#endif - - /* Figure out the decimal point character. */ -#ifdef COMPILE_WSCANF - decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC); -#else - decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string; -#endif - /* Figure out the thousands separator character. */ -#ifdef COMPILE_WSCANF - thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC); -#else - thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string; - if (*thousands == '\0') - thousands = NULL; -#endif - } - - /* Lock the stream. */ - LOCK_STREAM (s); - - -#ifndef COMPILE_WSCANF - /* From now on we use `state' to convert the format string. */ - memset (&state, '\0', sizeof (state)); -#endif - - /* Run through the format string. */ - while (*f != '\0') - { - unsigned int argpos; - /* Extract the next argument, which is of type TYPE. - For a %N$... spec, this is the Nth argument from the beginning; - otherwise it is the next argument after the state now in ARG. */ -#ifdef __va_copy -# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \ - ({ unsigned int pos = argpos; \ - va_list arg; \ - __va_copy (arg, argptr); \ - while (--pos > 0) \ - (void) va_arg (arg, void *); \ - va_arg (arg, type); \ - })) -#else -# if 0 - /* XXX Possible optimization. */ -# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \ - ({ va_list arg = (va_list) argptr; \ - arg = (va_list) ((char *) arg \ - + (argpos - 1) \ - * __va_rounded_size (void *)); \ - va_arg (arg, type); \ - })) -# else -# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \ - ({ unsigned int pos = argpos; \ - va_list arg = (va_list) argptr; \ - while (--pos > 0) \ - (void) va_arg (arg, void *); \ - va_arg (arg, type); \ - })) -# endif -#endif - -#ifndef COMPILE_WSCANF - if (!isascii ((unsigned char) *f)) - { - /* Non-ASCII, may be a multibyte. */ - int len = __mbrlen (f, strlen (f), &state); - if (len > 0) - { - do - { - c = inchar (); - if (__glibc_unlikely (c == EOF)) - input_error (); - else if (c != (unsigned char) *f++) - { - ungetc_not_eof (c, s); - conv_error (); - } - } - while (--len > 0); - continue; - } - } -#endif - - fc = *f++; - if (fc != '%') - { - /* Remember to skip spaces. */ - if (ISSPACE (fc)) - { - skip_space = 1; - continue; - } - - /* Read a character. */ - c = inchar (); - - /* Characters other than format specs must just match. */ - if (__glibc_unlikely (c == EOF)) - input_error (); - - /* We saw white space char as the last character in the format - string. Now it's time to skip all leading white space. */ - if (skip_space) - { - while (ISSPACE (c)) - if (__glibc_unlikely (inchar () == EOF)) - input_error (); - skip_space = 0; - } - - if (__glibc_unlikely (c != fc)) - { - ungetc (c, s); - conv_error (); - } - - continue; - } - - /* This is the start of the conversion string. */ - flags = 0; - - /* Initialize state of modifiers. */ - argpos = 0; - - /* Prepare temporary buffer. */ - char_buffer_rewind (&charbuf); - - /* Check for a positional parameter specification. */ - if (ISDIGIT ((UCHAR_T) *f)) - { - argpos = read_int ((const UCHAR_T **) &f); - if (*f == L_('$')) - ++f; - else - { - /* Oops; that was actually the field width. */ - width = argpos; - argpos = 0; - goto got_width; - } - } - - /* Check for the assignment-suppressing, the number grouping flag, - and the signal to use the locale's digit representation. */ - while (*f == L_('*') || *f == L_('\'') || *f == L_('I')) - switch (*f++) - { - case L_('*'): - flags |= SUPPRESS; - break; - case L_('\''): -#ifdef COMPILE_WSCANF - if (thousands != L'\0') -#else - if (thousands != NULL) -#endif - flags |= GROUP; - break; - case L_('I'): - flags |= I18N; - break; - } - - /* Find the maximum field width. */ - width = 0; - if (ISDIGIT ((UCHAR_T) *f)) - width = read_int ((const UCHAR_T **) &f); - got_width: - if (width == 0) - width = -1; - - /* Check for type modifiers. */ - switch (*f++) - { - case L_('h'): - /* ints are short ints or chars. */ - if (*f == L_('h')) - { - ++f; - flags |= CHAR; - } - else - flags |= SHORT; - break; - case L_('l'): - if (*f == L_('l')) - { - /* A double `l' is equivalent to an `L'. */ - ++f; - flags |= LONGDBL | LONG; - } - else - /* ints are long ints. */ - flags |= LONG; - break; - case L_('q'): - case L_('L'): - /* doubles are long doubles, and ints are long long ints. */ - flags |= LONGDBL | LONG; - break; - case L_('a'): - /* The `a' is used as a flag only if followed by `s', `S' or - `['. */ - if (*f != L_('s') && *f != L_('S') && *f != L_('[')) - { - --f; - break; - } - /* In __isoc99_*scanf %as, %aS and %a[ extension is not - supported at all. */ - if (s->_flags2 & _IO_FLAGS2_SCANF_STD) - { - --f; - break; - } - /* String conversions (%s, %[) take a `char **' - arg and fill it in with a malloc'd pointer. */ - flags |= GNU_MALLOC; - break; - case L_('m'): - flags |= POSIX_MALLOC; - if (*f == L_('l')) - { - ++f; - flags |= LONG; - } - break; - case L_('z'): - if (need_longlong && sizeof (size_t) > sizeof (unsigned long int)) - flags |= LONGDBL; - else if (sizeof (size_t) > sizeof (unsigned int)) - flags |= LONG; - break; - case L_('j'): - if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int)) - flags |= LONGDBL; - else if (sizeof (uintmax_t) > sizeof (unsigned int)) - flags |= LONG; - break; - case L_('t'): - if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int)) - flags |= LONGDBL; - else if (sizeof (ptrdiff_t) > sizeof (int)) - flags |= LONG; - break; - default: - /* Not a recognized modifier. Backup. */ - --f; - break; - } - - /* End of the format string? */ - if (__glibc_unlikely (*f == L_('\0'))) - conv_error (); - - /* Find the conversion specifier. */ - fc = *f++; - if (skip_space || (fc != L_('[') && fc != L_('c') - && fc != L_('C') && fc != L_('n'))) - { - /* Eat whitespace. */ - int save_errno = errno; - __set_errno (0); - do - /* We add the additional test for EOF here since otherwise - inchar will restore the old errno value which might be - EINTR but does not indicate an interrupt since nothing - was read at this time. */ - if (__builtin_expect ((c == EOF || inchar () == EOF) - && errno == EINTR, 0)) - input_error (); - while (ISSPACE (c)); - __set_errno (save_errno); - ungetc (c, s); - skip_space = 0; - } - - switch (fc) - { - case L_('%'): /* Must match a literal '%'. */ - c = inchar (); - if (__glibc_unlikely (c == EOF)) - input_error (); - if (__glibc_unlikely (c != fc)) - { - ungetc_not_eof (c, s); - conv_error (); - } - break; - - case L_('n'): /* Answer number of assignments done. */ - /* Corrigendum 1 to ISO C 1990 describes the allowed flags - with the 'n' conversion specifier. */ - if (!(flags & SUPPRESS)) - { - /* Don't count the read-ahead. */ - if (need_longlong && (flags & LONGDBL)) - *ARG (long long int *) = read_in; - else if (need_long && (flags & LONG)) - *ARG (long int *) = read_in; - else if (flags & SHORT) - *ARG (short int *) = read_in; - else if (!(flags & CHAR)) - *ARG (int *) = read_in; - else - *ARG (char *) = read_in; - -#ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1 - /* We have a severe problem here. The ISO C standard - contradicts itself in explaining the effect of the %n - format in `scanf'. While in ISO C:1990 and the ISO C - Amendement 1:1995 the result is described as - - Execution of a %n directive does not effect the - assignment count returned at the completion of - execution of the f(w)scanf function. - - in ISO C Corrigendum 1:1994 the following was added: - - Subclause 7.9.6.2 - Add the following fourth example: - In: - #include <stdio.h> - int d1, d2, n1, n2, i; - i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2); - the value 123 is assigned to d1 and the value3 to n1. - Because %n can never get an input failure the value - of 3 is also assigned to n2. The value of d2 is not - affected. The value 3 is assigned to i. - - We go for now with the historically correct code from ISO C, - i.e., we don't count the %n assignments. When it ever - should proof to be wrong just remove the #ifdef above. */ - ++done; -#endif - } - break; - - case L_('c'): /* Match characters. */ - if ((flags & LONG) == 0) - { - if (width == -1) - width = 1; - -#define STRING_ARG(Str, Type, Width) \ - do if (!(flags & SUPPRESS)) \ - { \ - if (flags & MALLOC) \ - { \ - /* The string is to be stored in a malloc'd buffer. */ \ - /* For %mS using char ** is actually wrong, but \ - shouldn't make a difference on any arch glibc \ - supports and would unnecessarily complicate \ - things. */ \ - strptr = ARG (char **); \ - if (strptr == NULL) \ - conv_error (); \ - /* Allocate an initial buffer. */ \ - strsize = Width; \ - *strptr = (char *) malloc (strsize * sizeof (Type)); \ - Str = (Type *) *strptr; \ - if (Str != NULL) \ - add_ptr_to_free (strptr); \ - else if (flags & POSIX_MALLOC) \ - { \ - done = EOF; \ - goto errout; \ - } \ - } \ - else \ - Str = ARG (Type *); \ - if (Str == NULL) \ - conv_error (); \ - } while (0) -#ifdef COMPILE_WSCANF - STRING_ARG (str, char, 100); -#else - STRING_ARG (str, char, (width > 1024 ? 1024 : width)); -#endif - - c = inchar (); - if (__glibc_unlikely (c == EOF)) - input_error (); - -#ifdef COMPILE_WSCANF - /* We have to convert the wide character(s) into multibyte - characters and store the result. */ - memset (&state, '\0', sizeof (state)); - - do - { - size_t n; - - if (!(flags & SUPPRESS) && (flags & POSIX_MALLOC) - && *strptr + strsize - str <= MB_LEN_MAX) - { - /* We have to enlarge the buffer if the `m' flag - was given. */ - size_t strleng = str - *strptr; - char *newstr; - - newstr = (char *) realloc (*strptr, strsize * 2); - if (newstr == NULL) - { - /* Can't allocate that much. Last-ditch effort. */ - newstr = (char *) realloc (*strptr, - strleng + MB_LEN_MAX); - if (newstr == NULL) - { - /* c can't have `a' flag, only `m'. */ - done = EOF; - goto errout; - } - else - { - *strptr = newstr; - str = newstr + strleng; - strsize = strleng + MB_LEN_MAX; - } - } - else - { - *strptr = newstr; - str = newstr + strleng; - strsize *= 2; - } - } - - n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state); - if (__glibc_unlikely (n == (size_t) -1)) - /* No valid wide character. */ - input_error (); - - /* Increment the output pointer. Even if we don't - write anything. */ - str += n; - } - while (--width > 0 && inchar () != EOF); -#else - |
