diff options
37 files changed, 3715 insertions, 242 deletions
@@ -1,3 +1,79 @@ +1999-01-11 Ulrich Drepper <drepper@cygnus.com> + + * ctype/Versions [GLIBC_2.0]: Export __ctype32_b. + * include/wctype.h: Declare __iswctype. + * stdio-common/vfscanf.c (__vfscanf): Use __iswspace instead of + iswspace. + * wctype/Makefile (routines): Add wcextra_l. + * wctype/wcextra.c (iswblank): Implement function here and don't use + __iswctype. + (__iswblank_l): Move definition to... + * wctype/wcextra_l.c: ...here. New file. + * wctype/wcfuncs.c: Really implement functions and don't call + __iswctype or __towctrans. + * wctype/wctype.h: Change isw* and tow* macros. Don't call + __iswctype or __towctrans. Instead optimize constant argument case. + + * iconv/gconv.h: Fix typos. + + * iconv/skeleton.c: Fix typos. Optimize init function a bit. + Correctly emit escape sequence to return to initial state in + conversion function. + + * iconvdata/iso-2022-jp.c (gconv_init): Correctly initialize + max_needed_to element. + + * manual/mbyte.texi: Removed. This is now described in charset.texi. + * manual/charset.texi: New file. + * manual/Makefile (chapters): Replace mbyte by charset. + * manual/ctype.texi: Document wide character functions. + * manual/intro.texi: Fix reference to mbyte chapter. + * manual/lang.texi: Likewise. + * manual/locale.texi: Likewise. + * manual/stdio.texi: Likewise. + * manual/string.texi: Fix @node line for new charset chapter. + * manual/libc.texinfo (UPDATED): Updated. Also update copyright years. + * manual/memory.texi (savestring): Optimize code to give a good + example. + + * manual/filesys.texi: Fix wording. Patches by Jim Meyering. + + * nscd/nscd_getgr_r.c: Include stdint.h to get uintptr_t definition. + * nscd/nscd_getpw_r.c: Likewise. + * nscd/nscd_gethst_r.c: Likewise. + + * stdlib/stdtold_l.c: Always include xlocale.h. + +1999-01-11 Geoffrey Keating <geoffk@ozemail.com.au> + + * stdlib/fpioconst.h (LDBL_MAX_10_EXP_LOG): Define to be same as + DBL_MAX_10_EXP_LOG if there is no long double. + (_fpioconst_pow10): Always use size as LDBL_MAX_10_EXP_LOG to match + printf_fp.c. + +1999-01-10 Andreas Jaeger <aj@arthur.rhein-neckar.de> + + * timezone/Makefile ($(testdata)/GB): Changed to ... + ($(testdata)/Europe/London): ... for tst-timezone test. + ($(objpfx)tst-timezone.out): Change GB to Europe/London. + + * timezone/tst-timezone.c (main): Enable DST switching test, + change GB to Europe/London. + +1999-01-10 Philip Blundell <philb@gnu.org> + + * socket/Makefile (headers): Remove bits/sockunion.h. + +1999-01-09 Philip Blundell <philb@gnu.org> + + * socket/sys/socket.h: Don't include <bits/sockunion.h>. + * sysdeps/generic/bits/sockunion.h: Deleted. + * sysdeps/unix/sysv/linux/bits/sockunion.h: Likewise. + +1999-01-08 H.J. Lu <hjl@gnu.org> + + * io/fts.c (fts_close): Don't access memory after having it freed. + 1998-01-08 Andreas Schwab <schwab@issan.cs.uni-dortmund.de> * manual/Makefile (stamp-summary): Remove space after -t option diff --git a/ctype/Versions b/ctype/Versions index 56647bd784..6110f848c8 100644 --- a/ctype/Versions +++ b/ctype/Versions @@ -1,7 +1,8 @@ libc { GLIBC_2.0 { # global variables - __ctype_b; __ctype_tolower; __ctype_toupper; _tolower; _toupper; + __ctype_b; __ctype32_b; __ctype_tolower; __ctype_toupper; + _tolower; _toupper; # i* isalnum; isalpha; isascii; isblank; iscntrl; isdigit; isgraph; islower; diff --git a/iconv/gconv.h b/iconv/gconv.h index 3f787c5e1c..66c34aa928 100644 --- a/iconv/gconv.h +++ b/iconv/gconv.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -69,7 +69,7 @@ typedef void (*gconv_end_fct) __PMT ((struct gconv_step *)); struct gconv_step { struct gconv_loaded_object *shlib_handle; - const char *modname; + __const char *modname; int counter; @@ -104,7 +104,7 @@ struct gconv_step_data int is_last; /* Counter for number of invocations of the module function for this - desriptor. */ + descriptor. */ int invocation_counter; /* Flag whether this is an internal use of the module (in the mb*towc* diff --git a/iconv/skeleton.c b/iconv/skeleton.c index 4ed16d6e68..c124eb1e07 100644 --- a/iconv/skeleton.c +++ b/iconv/skeleton.c @@ -1,5 +1,5 @@ /* Skeleton for a conversion module. - Copyright (C) 1998 Free Software Foundation, Inc. + Copyright (C) 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. @@ -119,7 +119,7 @@ static int to_object; character set we we can define RESET_INPUT_BUFFER is necessary. */ #if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE # if MIN_NEEDED_FROM == MAX_NEEDED_FROM && MIN_NEEDED_TO == MAX_NEEDED_TO -/* We have to used these `if's here since the compiler cannot know that +/* We have to use these `if's here since the compiler cannot know that (outbuf - outerr) is always divisible by MIN_NEEDED_TO. */ # define RESET_INPUT_BUFFER \ if (MIN_NEEDED_FROM % MIN_NEEDED_TO == 0) \ @@ -144,26 +144,25 @@ gconv_init (struct gconv_step *step) { /* Determine which direction. */ if (__strcasecmp (step->from_name, CHARSET_NAME) == 0) - step->data = &from_object; - else if (__strcasecmp (step->to_name, CHARSET_NAME) == 0) - step->data = &to_object; - else - return GCONV_NOCONV; - - if (step->data == &from_object) { + step->data = &from_object; + step->min_needed_from = MIN_NEEDED_FROM; step->max_needed_from = MAX_NEEDED_FROM; step->min_needed_to = MIN_NEEDED_TO; step->max_needed_to = MAX_NEEDED_TO; } - else + else if (__strcasecmp (step->to_name, CHARSET_NAME) == 0) { + step->data = &to_object; + step->min_needed_from = MIN_NEEDED_TO; step->max_needed_from = MAX_NEEDED_TO; step->min_needed_to = MIN_NEEDED_FROM; step->max_needed_to = MAX_NEEDED_FROM; } + else + return GCONV_NOCONV; #ifdef RESET_STATE step->stateful = 1; @@ -210,22 +209,17 @@ FUNCTION_NAME (struct gconv_step *step, struct gconv_step_data *data, dropped. */ if (do_flush) { - /* Call the steps down the chain if there are any. */ - if (data->is_last) - status = GCONV_OK; - else - { -#ifdef EMIT_SHIFT_TO_INIT - status = GCONV_OK; + status = GCONV_OK; - EMIT_SHIFT_TO_INIT; - - if (status == GCONV_OK) +#ifdef EMIT_SHIFT_TO_INIT + /* Emit the escape sequence to reset the state. */ + EMIT_SHIFT_TO_INIT; #endif - /* Give the modules below the same chance. */ - status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL, - written, 1)); - } + /* Call the steps down the chain if there are any but only if we + successfully emitted the escape sequence. */ + if (status == GCONV_OK && ! data->is_last) + status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL, + written, 1)); } else { @@ -271,7 +265,7 @@ FUNCTION_NAME (struct gconv_step *step, struct gconv_step_data *data, data->statep, step->data, &converted EXTRA_LOOP_ARGS); - /* If this is the last step leave the loop, there is nothgin + /* If this is the last step leave the loop, there is nothing we can do. */ if (data->is_last) { diff --git a/iconvdata/iso-2022-jp.c b/iconvdata/iso-2022-jp.c index 36465ccd45..a7ec09b32d 100644 --- a/iconvdata/iso-2022-jp.c +++ b/iconvdata/iso-2022-jp.c @@ -1,5 +1,5 @@ /* Conversion module for ISO-2022-JP. - Copyright (C) 1998 Free Software Foundation, Inc. + Copyright (C) 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. @@ -149,14 +149,14 @@ gconv_init (struct gconv_step *step) step->min_needed_from = MIN_NEEDED_FROM; step->max_needed_from = MAX_NEEDED_FROM; step->min_needed_to = MIN_NEEDED_TO; - step->max_needed_to = MIN_NEEDED_TO; + step->max_needed_to = MAX_NEEDED_TO; } else { step->min_needed_from = MIN_NEEDED_TO; step->max_needed_from = MAX_NEEDED_TO; step->min_needed_to = MIN_NEEDED_FROM; - step->max_needed_to = MIN_NEEDED_FROM + 2; + step->max_needed_to = MAX_NEEDED_FROM + 2; } /* Yes, this is a stateful encoding. */ diff --git a/include/wctype.h b/include/wctype.h index c76f50c866..f93ec64abc 100644 --- a/include/wctype.h +++ b/include/wctype.h @@ -1 +1,7 @@ +#ifndef _WCTYPE_H + #include <wctype/wctype.h> + +extern int __iswspace __P ((wint_t __wc)); + +#endif @@ -231,6 +231,7 @@ fts_close(sp) { register FTSENT *freep, *p; int saved_errno; + int retval = 0; /* * This still works if we haven't read anything -- the dummy structure @@ -259,15 +260,16 @@ fts_close(sp) (void)__close(sp->fts_rfd); } - /* Free up the stream pointer. */ - free(sp); - /* Set errno and return. */ if (!ISSET(FTS_NOCHDIR) && saved_errno) { __set_errno (saved_errno); - return (-1); + retval = -1; } - return (0); + + /* Free up the stream pointer. */ + free (sp); + + return retval; } /* diff --git a/manual/Makefile b/manual/Makefile index e0dad4792c..8eb4d5b69e 100644 --- a/manual/Makefile +++ b/manual/Makefile @@ -49,7 +49,7 @@ endif mkinstalldirs = $(..)scripts/mkinstalldirs chapters = $(addsuffix .texi, \ - intro errno memory ctype string mbyte locale \ + intro errno memory ctype string charset locale \ message search pattern io stdio llio filesys \ pipe socket terminal math arith time setjmp \ signal startup process job nss users sysinfo conf) @@ -74,7 +74,7 @@ libc.dvi: texinfo.tex # Generate the summary from the Texinfo source files for each chapter. summary.texi: stamp-summary ; stamp-summary: summary.awk $(filter-out summary.texi, $(texis)) - $(AWK) -f $^ | sort -t'^L' -df +0 -1 | tr '\014' '\012' > summary-tmp + $(AWK) -f $^ | sort -t'' -df +0 -1 | tr '\014' '\012' > summary-tmp $(move-if-change) summary-tmp summary.texi touch $@ diff --git a/manual/chapters.texi b/manual/chapters.texi index a5a8a57903..bf7c4c01e0 100644 --- a/manual/chapters.texi +++ b/manual/chapters.texi @@ -3,7 +3,7 @@ @include memory.texi @include ctype.texi @include string.texi -@include mbyte.texi +@include charset.texi @include locale.texi @include message.texi @include search.texi @@ -27,6 +27,7 @@ @include users.texi @include sysinfo.texi @include conf.texi +@include ../crypt/crypt.texi @include ../linuxthreads/linuxthreads.texi @include lang.texi @include header.texi diff --git a/manual/charset.texi b/manual/charset.texi new file mode 100644 index 0000000000..6179128e3c --- /dev/null +++ b/manual/charset.texi @@ -0,0 +1,2846 @@ +@node Character Set Handling, Locales, String and Array Utilities, Top +@c %MENU% Support for extended character sets +@chapter Character Set Handling + +@ifnottex +@macro cal{text} +\text\ +@end macro +@end ifnottex + +Character sets used in the early days of computers had only six, seven, +or eight bits for each character. In no case more bits than would fit +into one byte which nowadays is almost exclusively @w{8 bits} wide. +This of course leads to several problems once not all characters needed +at one time can be represented by the up to 256 available characters. +This chapter shows the functionality which was added to the C library to +overcome this problem. + +@menu +* Extended Char Intro:: Introduction to Extended Characters. +* Charset Function Overview:: Overview about Character Handling + Functions. +* Restartable multibyte conversion:: Restartable multibyte conversion + Functions. +* Non-reentrant Conversion:: Non-reentrant Conversion Function. +* Generic Charset Conversion:: Generic Charset Conversion. +@end menu + + +@node Extended Char Intro +@section Introduction to Extended Characters + +To overcome the limitations of character sets with a 1:1 relation +between bytes and characters people came up with a variety of solutions. +The remainder of this section gives a few examples to help understanding +the design decision made while developing the functionality of the @w{C +library} to support them. + +@cindex internal representation +A distinction we have to make right away is between internal and +external representation. @dfn{Internal representation} means the +representation used by a program while keeping the text in memory. +External representations are used when text is stored or transmitted +through whatever communication channel. + +Traditionally there was no difference between the two representations. +It was equally comfortable and useful to use the same one-byte +representation internally and externally. This changes with more and +larger character sets. + +One of the problems to overcome with the internal representation is +handling text which were externally encoded using different character +sets. Assume a program which reads two texts and compares them using +some metric. The comparison can be usefully done only if the texts are +internally kept in a common format. + +@cindex wide character +For such a common format (@math{=} character set) eight bits are certainly +not enough anymore. So the smallest entity will have to grow: @dfn{wide +characters} will be used. Here instead of one byte one uses two or four +(three are not good to address in memory and more than four bytes seem +not to be necessary). + +@cindex Unicode +@cindex ISO 10646 +As shown in some other part of this manual +@c !!! Ahem, wide char string functions are not yet covered -- drepper +there exists a completely new family of functions which can handle texts +of this kinds in memory. The most commonly used character set for such +internal wide character representations are Unicode and @w{ISO 10646}. +The former is a subset of the later and used when wide characters are +chosen to by 2 bytes (@math{= 16} bits) wide. The standard names of the +@cindex UCS2 +@cindex UCS4 +encodings used in these cases are UCS2 (@math{= 16} bits) and UCS4 +(@math{= 32} bits). + +To represent wide characters the @code{char} type is certainly not +suitable. For this reason the @w{ISO C} standard introduces a new type +which is designed to keep one character of a wide character string. To +maintain the similarity there is also a type corresponding to @code{int} +for those functions which take a single wide character. + +@comment stddef.h +@comment ISO +@deftp {Data type} wchar_t +This data type is used as the base type for wide character strings. +I.e., arrays of objects of this type are the equivalent of @code{char[]} +for multibyte character strings. The type is defined in @file{stddef.h}. + +The @w{ISO C89} standard, where this type was introduced, does not say +anything specific about the representation. It only requires that this +type is capable to store all elements of the basic character set. +Therefore it would be legitimate to define @code{wchar_t} and +@code{char}. This might make sense for embedded systems. + +But for GNU systems this type is always 32 bits wide. It is the |
