aboutsummaryrefslogtreecommitdiff
path: root/stdio-common/vfscanf.c
diff options
context:
space:
mode:
Diffstat (limited to 'stdio-common/vfscanf.c')
-rw-r--r--stdio-common/vfscanf.c3042
1 files changed, 1 insertions, 3041 deletions
diff --git a/stdio-common/vfscanf.c b/stdio-common/vfscanf.c
index 1ce836a324..5eedca8340 100644
--- a/stdio-common/vfscanf.c
+++ b/stdio-common/vfscanf.c
@@ -15,3053 +15,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <assert.h>
-#include <errno.h>
-#include <limits.h>
-#include <ctype.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <wchar.h>
-#include <wctype.h>
-#include <libc-diag.h>
-#include <libc-lock.h>
-#include <locale/localeinfo.h>
-#include <scratch_buffer.h>
-
-#ifdef __GNUC__
-# define HAVE_LONGLONG
-# define LONGLONG long long
-#else
-# define LONGLONG long
-#endif
-
-/* Determine whether we have to handle `long long' at all. */
-#if LONG_MAX == LONG_LONG_MAX
-# define need_longlong 0
-#else
-# define need_longlong 1
-#endif
-
-/* Determine whether we have to handle `long'. */
-#if INT_MAX == LONG_MAX
-# define need_long 0
-#else
-# define need_long 1
-#endif
-
-/* Those are flags in the conversion format. */
-#define LONG 0x0001 /* l: long or double */
-#define LONGDBL 0x0002 /* L: long long or long double */
-#define SHORT 0x0004 /* h: short */
-#define SUPPRESS 0x0008 /* *: suppress assignment */
-#define POINTER 0x0010 /* weird %p pointer (`fake hex') */
-#define NOSKIP 0x0020 /* do not skip blanks */
-#define NUMBER_SIGNED 0x0040 /* signed integer */
-#define GROUP 0x0080 /* ': group numbers */
-#define GNU_MALLOC 0x0100 /* a: malloc strings */
-#define CHAR 0x0200 /* hh: char */
-#define I18N 0x0400 /* I: use locale's digits */
-#define HEXA_FLOAT 0x0800 /* hexadecimal float */
-#define READ_POINTER 0x1000 /* this is a pointer value */
-#define POSIX_MALLOC 0x2000 /* m: malloc strings */
-#define MALLOC (GNU_MALLOC | POSIX_MALLOC)
-
-#include <locale/localeinfo.h>
#include <libioP.h>
-#ifdef COMPILE_WSCANF
-# define ungetc(c, s) ((void) (c == WEOF \
- || (--read_in, \
- _IO_sputbackwc (s, c))))
-# define ungetc_not_eof(c, s) ((void) (--read_in, \
- _IO_sputbackwc (s, c)))
-# define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
- : ((c = _IO_getwc_unlocked (s)), \
- (void) (c != WEOF \
- ? ++read_in \
- : (size_t) (inchar_errno = errno)), c))
-
-# define ISSPACE(Ch) iswspace (Ch)
-# define ISDIGIT(Ch) iswdigit (Ch)
-# define ISXDIGIT(Ch) iswxdigit (Ch)
-# define TOLOWER(Ch) towlower (Ch)
-# define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
-# define __strtoll_internal __wcstoll_internal
-# define __strtoull_internal __wcstoull_internal
-# define __strtol_internal __wcstol_internal
-# define __strtoul_internal __wcstoul_internal
-# define __strtold_internal __wcstold_internal
-# define __strtod_internal __wcstod_internal
-# define __strtof_internal __wcstof_internal
-
-# define L_(Str) L##Str
-# define CHAR_T wchar_t
-# define UCHAR_T unsigned int
-# define WINT_T wint_t
-# undef EOF
-# define EOF WEOF
-#else
-# define ungetc(c, s) ((void) ((int) c == EOF \
- || (--read_in, \
- _IO_sputbackc (s, (unsigned char) c))))
-# define ungetc_not_eof(c, s) ((void) (--read_in, \
- _IO_sputbackc (s, (unsigned char) c)))
-# define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
- : ((c = _IO_getc_unlocked (s)), \
- (void) (c != EOF \
- ? ++read_in \
- : (size_t) (inchar_errno = errno)), c))
-# define ISSPACE(Ch) __isspace_l (Ch, loc)
-# define ISDIGIT(Ch) __isdigit_l (Ch, loc)
-# define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
-# define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
-# define ORIENT if (_IO_vtable_offset (s) == 0 \
- && _IO_fwide (s, -1) != -1) \
- return EOF
-
-# define L_(Str) Str
-# define CHAR_T char
-# define UCHAR_T unsigned char
-# define WINT_T int
-#endif
-
-#include "printf-parse.h" /* Use read_int. */
-
-#define encode_error() do { \
- errval = 4; \
- __set_errno (EILSEQ); \
- goto errout; \
- } while (0)
-#define conv_error() do { \
- errval = 2; \
- goto errout; \
- } while (0)
-#define input_error() do { \
- errval = 1; \
- if (done == 0) done = EOF; \
- goto errout; \
- } while (0)
-#define add_ptr_to_free(ptr) \
- do \
- { \
- if (ptrs_to_free == NULL \
- || ptrs_to_free->count == (sizeof (ptrs_to_free->ptrs) \
- / sizeof (ptrs_to_free->ptrs[0]))) \
- { \
- struct ptrs_to_free *new_ptrs = alloca (sizeof (*ptrs_to_free)); \
- new_ptrs->count = 0; \
- new_ptrs->next = ptrs_to_free; \
- ptrs_to_free = new_ptrs; \
- } \
- ptrs_to_free->ptrs[ptrs_to_free->count++] = (ptr); \
- } \
- while (0)
-#define ARGCHECK(s, format) \
- do \
- { \
- /* Check file argument for consistence. */ \
- CHECK_FILE (s, EOF); \
- if (s->_flags & _IO_NO_READS) \
- { \
- __set_errno (EBADF); \
- return EOF; \
- } \
- else if (format == NULL) \
- { \
- __set_errno (EINVAL); \
- return EOF; \
- } \
- } while (0)
-#define LOCK_STREAM(S) \
- __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
- _IO_flockfile (S)
-#define UNLOCK_STREAM(S) \
- _IO_funlockfile (S); \
- __libc_cleanup_region_end (0)
-
-struct ptrs_to_free
-{
- size_t count;
- struct ptrs_to_free *next;
- char **ptrs[32];
-};
-
-struct char_buffer {
- CHAR_T *current;
- CHAR_T *end;
- struct scratch_buffer scratch;
-};
-
-/* Returns a pointer to the first CHAR_T object in the buffer. Only
- valid if char_buffer_add (BUFFER, CH) has been called and
- char_buffer_error (BUFFER) is false. */
-static inline CHAR_T *
-char_buffer_start (const struct char_buffer *buffer)
-{
- return (CHAR_T *) buffer->scratch.data;
-}
-
-/* Returns the number of CHAR_T objects in the buffer. Only valid if
- char_buffer_error (BUFFER) is false. */
-static inline size_t
-char_buffer_size (const struct char_buffer *buffer)
-{
- return buffer->current - char_buffer_start (buffer);
-}
-
-/* Reinitializes BUFFER->current and BUFFER->end to cover the entire
- scratch buffer. */
-static inline void
-char_buffer_rewind (struct char_buffer *buffer)
-{
- buffer->current = char_buffer_start (buffer);
- buffer->end = buffer->current + buffer->scratch.length / sizeof (CHAR_T);
-}
-
-/* Returns true if a previous call to char_buffer_add (BUFFER, CH)
- failed. */
-static inline bool
-char_buffer_error (const struct char_buffer *buffer)
-{
- return __glibc_unlikely (buffer->current == NULL);
-}
-
-/* Slow path for char_buffer_add. */
-static void
-char_buffer_add_slow (struct char_buffer *buffer, CHAR_T ch)
-{
- if (char_buffer_error (buffer))
- return;
- size_t offset = buffer->end - (CHAR_T *) buffer->scratch.data;
- if (!scratch_buffer_grow_preserve (&buffer->scratch))
- {
- buffer->current = NULL;
- buffer->end = NULL;
- return;
- }
- char_buffer_rewind (buffer);
- buffer->current += offset;
- *buffer->current++ = ch;
-}
-
-/* Adds CH to BUFFER. This function does not report any errors, check
- for them with char_buffer_error. */
-static inline void
-char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
- __attribute__ ((always_inline));
-static inline void
-char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
-{
- if (__glibc_unlikely (buffer->current == buffer->end))
- char_buffer_add_slow (buffer, ch);
- else
- *buffer->current++ = ch;
-}
-
-/* Read formatted input from S according to the format string
- FORMAT, using the argument list in ARG.
- Return the number of assignments made, or -1 for an input error. */
-#ifdef COMPILE_WSCANF
-int
-_IO_vfwscanf (FILE *s, const wchar_t *format, va_list argptr,
- int *errp)
-#else
-int
-_IO_vfscanf_internal (FILE *s, const char *format, va_list argptr,
- int *errp)
-#endif
-{
- va_list arg;
- const CHAR_T *f = format;
- UCHAR_T fc; /* Current character of the format. */
- WINT_T done = 0; /* Assignments done. */
- size_t read_in = 0; /* Chars read in. */
- WINT_T c = 0; /* Last char read. */
- int width; /* Maximum field width. */
- int flags; /* Modifiers for current format element. */
- int errval = 0;
-#ifndef COMPILE_WSCANF
- locale_t loc = _NL_CURRENT_LOCALE;
- struct __locale_data *const curctype = loc->__locales[LC_CTYPE];
-#endif
-
- /* Errno of last failed inchar call. */
- int inchar_errno = 0;
- /* Status for reading F-P nums. */
- char got_digit, got_dot, got_e, got_sign;
- /* If a [...] is a [^...]. */
- CHAR_T not_in;
-#define exp_char not_in
- /* Base for integral numbers. */
- int base;
- /* Decimal point character. */
-#ifdef COMPILE_WSCANF
- wint_t decimal;
-#else
- const char *decimal;
-#endif
- /* The thousands character of the current locale. */
-#ifdef COMPILE_WSCANF
- wint_t thousands;
-#else
- const char *thousands;
-#endif
- struct ptrs_to_free *ptrs_to_free = NULL;
- /* State for the conversions. */
- mbstate_t state;
- /* Integral holding variables. */
- union
- {
- long long int q;
- unsigned long long int uq;
- long int l;
- unsigned long int ul;
- } num;
- /* Character-buffer pointer. */
- char *str = NULL;
- wchar_t *wstr = NULL;
- char **strptr = NULL;
- ssize_t strsize = 0;
- /* We must not react on white spaces immediately because they can
- possibly be matched even if in the input stream no character is
- available anymore. */
- int skip_space = 0;
- /* Workspace. */
- CHAR_T *tw; /* Temporary pointer. */
- struct char_buffer charbuf;
- scratch_buffer_init (&charbuf.scratch);
-
-#ifdef __va_copy
- __va_copy (arg, argptr);
-#else
- arg = (va_list) argptr;
-#endif
-
-#ifdef ORIENT
- ORIENT;
-#endif
-
- ARGCHECK (s, format);
-
- {
-#ifndef COMPILE_WSCANF
- struct __locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
-#endif
-
- /* Figure out the decimal point character. */
-#ifdef COMPILE_WSCANF
- decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
-#else
- decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
-#endif
- /* Figure out the thousands separator character. */
-#ifdef COMPILE_WSCANF
- thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
-#else
- thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
- if (*thousands == '\0')
- thousands = NULL;
-#endif
- }
-
- /* Lock the stream. */
- LOCK_STREAM (s);
-
-
-#ifndef COMPILE_WSCANF
- /* From now on we use `state' to convert the format string. */
- memset (&state, '\0', sizeof (state));
-#endif
-
- /* Run through the format string. */
- while (*f != '\0')
- {
- unsigned int argpos;
- /* Extract the next argument, which is of type TYPE.
- For a %N$... spec, this is the Nth argument from the beginning;
- otherwise it is the next argument after the state now in ARG. */
-#ifdef __va_copy
-# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
- ({ unsigned int pos = argpos; \
- va_list arg; \
- __va_copy (arg, argptr); \
- while (--pos > 0) \
- (void) va_arg (arg, void *); \
- va_arg (arg, type); \
- }))
-#else
-# if 0
- /* XXX Possible optimization. */
-# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
- ({ va_list arg = (va_list) argptr; \
- arg = (va_list) ((char *) arg \
- + (argpos - 1) \
- * __va_rounded_size (void *)); \
- va_arg (arg, type); \
- }))
-# else
-# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
- ({ unsigned int pos = argpos; \
- va_list arg = (va_list) argptr; \
- while (--pos > 0) \
- (void) va_arg (arg, void *); \
- va_arg (arg, type); \
- }))
-# endif
-#endif
-
-#ifndef COMPILE_WSCANF
- if (!isascii ((unsigned char) *f))
- {
- /* Non-ASCII, may be a multibyte. */
- int len = __mbrlen (f, strlen (f), &state);
- if (len > 0)
- {
- do
- {
- c = inchar ();
- if (__glibc_unlikely (c == EOF))
- input_error ();
- else if (c != (unsigned char) *f++)
- {
- ungetc_not_eof (c, s);
- conv_error ();
- }
- }
- while (--len > 0);
- continue;
- }
- }
-#endif
-
- fc = *f++;
- if (fc != '%')
- {
- /* Remember to skip spaces. */
- if (ISSPACE (fc))
- {
- skip_space = 1;
- continue;
- }
-
- /* Read a character. */
- c = inchar ();
-
- /* Characters other than format specs must just match. */
- if (__glibc_unlikely (c == EOF))
- input_error ();
-
- /* We saw white space char as the last character in the format
- string. Now it's time to skip all leading white space. */
- if (skip_space)
- {
- while (ISSPACE (c))
- if (__glibc_unlikely (inchar () == EOF))
- input_error ();
- skip_space = 0;
- }
-
- if (__glibc_unlikely (c != fc))
- {
- ungetc (c, s);
- conv_error ();
- }
-
- continue;
- }
-
- /* This is the start of the conversion string. */
- flags = 0;
-
- /* Initialize state of modifiers. */
- argpos = 0;
-
- /* Prepare temporary buffer. */
- char_buffer_rewind (&charbuf);
-
- /* Check for a positional parameter specification. */
- if (ISDIGIT ((UCHAR_T) *f))
- {
- argpos = read_int ((const UCHAR_T **) &f);
- if (*f == L_('$'))
- ++f;
- else
- {
- /* Oops; that was actually the field width. */
- width = argpos;
- argpos = 0;
- goto got_width;
- }
- }
-
- /* Check for the assignment-suppressing, the number grouping flag,
- and the signal to use the locale's digit representation. */
- while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
- switch (*f++)
- {
- case L_('*'):
- flags |= SUPPRESS;
- break;
- case L_('\''):
-#ifdef COMPILE_WSCANF
- if (thousands != L'\0')
-#else
- if (thousands != NULL)
-#endif
- flags |= GROUP;
- break;
- case L_('I'):
- flags |= I18N;
- break;
- }
-
- /* Find the maximum field width. */
- width = 0;
- if (ISDIGIT ((UCHAR_T) *f))
- width = read_int ((const UCHAR_T **) &f);
- got_width:
- if (width == 0)
- width = -1;
-
- /* Check for type modifiers. */
- switch (*f++)
- {
- case L_('h'):
- /* ints are short ints or chars. */
- if (*f == L_('h'))
- {
- ++f;
- flags |= CHAR;
- }
- else
- flags |= SHORT;
- break;
- case L_('l'):
- if (*f == L_('l'))
- {
- /* A double `l' is equivalent to an `L'. */
- ++f;
- flags |= LONGDBL | LONG;
- }
- else
- /* ints are long ints. */
- flags |= LONG;
- break;
- case L_('q'):
- case L_('L'):
- /* doubles are long doubles, and ints are long long ints. */
- flags |= LONGDBL | LONG;
- break;
- case L_('a'):
- /* The `a' is used as a flag only if followed by `s', `S' or
- `['. */
- if (*f != L_('s') && *f != L_('S') && *f != L_('['))
- {
- --f;
- break;
- }
- /* In __isoc99_*scanf %as, %aS and %a[ extension is not
- supported at all. */
- if (s->_flags2 & _IO_FLAGS2_SCANF_STD)
- {
- --f;
- break;
- }
- /* String conversions (%s, %[) take a `char **'
- arg and fill it in with a malloc'd pointer. */
- flags |= GNU_MALLOC;
- break;
- case L_('m'):
- flags |= POSIX_MALLOC;
- if (*f == L_('l'))
- {
- ++f;
- flags |= LONG;
- }
- break;
- case L_('z'):
- if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
- flags |= LONGDBL;
- else if (sizeof (size_t) > sizeof (unsigned int))
- flags |= LONG;
- break;
- case L_('j'):
- if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
- flags |= LONGDBL;
- else if (sizeof (uintmax_t) > sizeof (unsigned int))
- flags |= LONG;
- break;
- case L_('t'):
- if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
- flags |= LONGDBL;
- else if (sizeof (ptrdiff_t) > sizeof (int))
- flags |= LONG;
- break;
- default:
- /* Not a recognized modifier. Backup. */
- --f;
- break;
- }
-
- /* End of the format string? */
- if (__glibc_unlikely (*f == L_('\0')))
- conv_error ();
-
- /* Find the conversion specifier. */
- fc = *f++;
- if (skip_space || (fc != L_('[') && fc != L_('c')
- && fc != L_('C') && fc != L_('n')))
- {
- /* Eat whitespace. */
- int save_errno = errno;
- __set_errno (0);
- do
- /* We add the additional test for EOF here since otherwise
- inchar will restore the old errno value which might be
- EINTR but does not indicate an interrupt since nothing
- was read at this time. */
- if (__builtin_expect ((c == EOF || inchar () == EOF)
- && errno == EINTR, 0))
- input_error ();
- while (ISSPACE (c));
- __set_errno (save_errno);
- ungetc (c, s);
- skip_space = 0;
- }
-
- switch (fc)
- {
- case L_('%'): /* Must match a literal '%'. */
- c = inchar ();
- if (__glibc_unlikely (c == EOF))
- input_error ();
- if (__glibc_unlikely (c != fc))
- {
- ungetc_not_eof (c, s);
- conv_error ();
- }
- break;
-
- case L_('n'): /* Answer number of assignments done. */
- /* Corrigendum 1 to ISO C 1990 describes the allowed flags
- with the 'n' conversion specifier. */
- if (!(flags & SUPPRESS))
- {
- /* Don't count the read-ahead. */
- if (need_longlong && (flags & LONGDBL))
- *ARG (long long int *) = read_in;
- else if (need_long && (flags & LONG))
- *ARG (long int *) = read_in;
- else if (flags & SHORT)
- *ARG (short int *) = read_in;
- else if (!(flags & CHAR))
- *ARG (int *) = read_in;
- else
- *ARG (char *) = read_in;
-
-#ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
- /* We have a severe problem here. The ISO C standard
- contradicts itself in explaining the effect of the %n
- format in `scanf'. While in ISO C:1990 and the ISO C
- Amendement 1:1995 the result is described as
-
- Execution of a %n directive does not effect the
- assignment count returned at the completion of
- execution of the f(w)scanf function.
-
- in ISO C Corrigendum 1:1994 the following was added:
-
- Subclause 7.9.6.2
- Add the following fourth example:
- In:
- #include <stdio.h>
- int d1, d2, n1, n2, i;
- i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
- the value 123 is assigned to d1 and the value3 to n1.
- Because %n can never get an input failure the value
- of 3 is also assigned to n2. The value of d2 is not
- affected. The value 3 is assigned to i.
-
- We go for now with the historically correct code from ISO C,
- i.e., we don't count the %n assignments. When it ever
- should proof to be wrong just remove the #ifdef above. */
- ++done;
-#endif
- }
- break;
-
- case L_('c'): /* Match characters. */
- if ((flags & LONG) == 0)
- {
- if (width == -1)
- width = 1;
-
-#define STRING_ARG(Str, Type, Width) \
- do if (!(flags & SUPPRESS)) \
- { \
- if (flags & MALLOC) \
- { \
- /* The string is to be stored in a malloc'd buffer. */ \
- /* For %mS using char ** is actually wrong, but \
- shouldn't make a difference on any arch glibc \
- supports and would unnecessarily complicate \
- things. */ \
- strptr = ARG (char **); \
- if (strptr == NULL) \
- conv_error (); \
- /* Allocate an initial buffer. */ \
- strsize = Width; \
- *strptr = (char *) malloc (strsize * sizeof (Type)); \
- Str = (Type *) *strptr; \
- if (Str != NULL) \
- add_ptr_to_free (strptr); \
- else if (flags & POSIX_MALLOC) \
- { \
- done = EOF; \
- goto errout; \
- } \
- } \
- else \
- Str = ARG (Type *); \
- if (Str == NULL) \
- conv_error (); \
- } while (0)
-#ifdef COMPILE_WSCANF
- STRING_ARG (str, char, 100);
-#else
- STRING_ARG (str, char, (width > 1024 ? 1024 : width));
-#endif
-
- c = inchar ();
- if (__glibc_unlikely (c == EOF))
- input_error ();
-
-#ifdef COMPILE_WSCANF
- /* We have to convert the wide character(s) into multibyte
- characters and store the result. */
- memset (&state, '\0', sizeof (state));
-
- do
- {
- size_t n;
-
- if (!(flags & SUPPRESS) && (flags & POSIX_MALLOC)
- && *strptr + strsize - str <= MB_LEN_MAX)
- {
- /* We have to enlarge the buffer if the `m' flag
- was given. */
- size_t strleng = str - *strptr;
- char *newstr;
-
- newstr = (char *) realloc (*strptr, strsize * 2);
- if (newstr == NULL)
- {
- /* Can't allocate that much. Last-ditch effort. */
- newstr = (char *) realloc (*strptr,
- strleng + MB_LEN_MAX);
- if (newstr == NULL)
- {
- /* c can't have `a' flag, only `m'. */
- done = EOF;
- goto errout;
- }
- else
- {
- *strptr = newstr;
- str = newstr + strleng;
- strsize = strleng + MB_LEN_MAX;
- }
- }
- else
- {
- *strptr = newstr;
- str = newstr + strleng;
- strsize *= 2;
- }
- }
-
- n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
- if (__glibc_unlikely (n == (size_t) -1))
- /* No valid wide character. */
- input_error ();
-
- /* Increment the output pointer. Even if we don't
- write anything. */
- str += n;
- }
- while (--width > 0 && inchar () != EOF);
-#else
-