diff options
| author | Ulrich Drepper <drepper@redhat.com> | 2001-02-05 05:57:24 +0000 |
|---|---|---|
| committer | Ulrich Drepper <drepper@redhat.com> | 2001-02-05 05:57:24 +0000 |
| commit | 93693c4d820dac2f218e144f5126a5a761f1cfbf (patch) | |
| tree | 7371d326f24a3c9d73fed75e7792133d0d654a50 | |
| parent | 377c725f8e2dba91af36af27206e6deb85cf6e6f (diff) | |
| download | glibc-93693c4d820dac2f218e144f5126a5a761f1cfbf.tar.xz glibc-93693c4d820dac2f218e144f5126a5a761f1cfbf.zip | |
Update.
2001-02-04 Ulrich Drepper <drepper@redhat.com>
* iconv/Makefile (iconv_prog-modules): Define. Add vpath to find
files in locale/programs. Add CFLAGS definition to allow compiling
localedef files.
* iconv/dummy-repertoire.c: New file.
* iconv/iconv_charmap.c: New file.
* iconv/iconv_prog.h: New file.
* iconv/iconv_prog.c: Make verbose and omit_invalid global.
(main): If parameter for -f and -t contain slashes try first to resolve
the strings as filenames of charmap files. Use them for conversion
in this case.
* iconvdata/run-iconv-test.sh: If charmaps exist also run tests with
iconv getting charmap names as parameters.
* locale/programs/linereader.c (lr_token): Take extra parameters
verbose and pass it to get_string.
(get_string): Take extra parameters verbose.
* locale/programs/charmap.c (parse_charmap): Take extra parameters
verbose and be_quiet. Change all callers of lr_token and
parse_charmap.
* locale/programs/charmap.h: Likewise.
* locale/programs/ld-address.c: Likewise.
* locale/programs/ld-collate.c: Likewise.
* locale/programs/ld-ctype.c: Likewise.
* locale/programs/ld-identification.c: Likewise.
* locale/programs/ld-measurement.c: Likewise.
* locale/programs/ld-messages.c: Likewise.
* locale/programs/ld-monetary.c: Likewise.
* locale/programs/ld-name.c: Likewise.
* locale/programs/ld-numeric.c: Likewise.
* locale/programs/ld-paper.c: Likewise.
* locale/programs/ld-telephone.c: Likewise.
* locale/programs/ld-time.c: Likewise.
* locale/programs/linereader.c: Likewise.
* locale/programs/linereader.h: Likewise.
* locale/programs/localedef.c: Likewise.
* locale/programs/locfile.c: Likewise.
* locale/programs/locfile.h: Likewise.
* locale/programs/repertoire.c: Likewise.
27 files changed, 1058 insertions, 304 deletions
@@ -1,3 +1,43 @@ +2001-02-04 Ulrich Drepper <drepper@redhat.com> + + * iconv/Makefile (iconv_prog-modules): Define. Add vpath to find + files in locale/programs. Add CFLAGS definition to allow compiling + localedef files. + * iconv/dummy-repertoire.c: New file. + * iconv/iconv_charmap.c: New file. + * iconv/iconv_prog.h: New file. + * iconv/iconv_prog.c: Make verbose and omit_invalid global. + (main): If parameter for -f and -t contain slashes try first to resolve + the strings as filenames of charmap files. Use them for conversion + in this case. + * iconvdata/run-iconv-test.sh: If charmaps exist also run tests with + iconv getting charmap names as parameters. + * locale/programs/linereader.c (lr_token): Take extra parameters + verbose and pass it to get_string. + (get_string): Take extra parameters verbose. + * locale/programs/charmap.c (parse_charmap): Take extra parameters + verbose and be_quiet. Change all callers of lr_token and + parse_charmap. + * locale/programs/charmap.h: Likewise. + * locale/programs/ld-address.c: Likewise. + * locale/programs/ld-collate.c: Likewise. + * locale/programs/ld-ctype.c: Likewise. + * locale/programs/ld-identification.c: Likewise. + * locale/programs/ld-measurement.c: Likewise. + * locale/programs/ld-messages.c: Likewise. + * locale/programs/ld-monetary.c: Likewise. + * locale/programs/ld-name.c: Likewise. + * locale/programs/ld-numeric.c: Likewise. + * locale/programs/ld-paper.c: Likewise. + * locale/programs/ld-telephone.c: Likewise. + * locale/programs/ld-time.c: Likewise. + * locale/programs/linereader.c: Likewise. + * locale/programs/linereader.h: Likewise. + * locale/programs/localedef.c: Likewise. + * locale/programs/locfile.c: Likewise. + * locale/programs/locfile.h: Likewise. + * locale/programs/repertoire.c: Likewise. + 2001-02-03 Ulrich Drepper <drepper@redhat.com> * iconv/iconv_prog.c (main): If output file name is "-" write to diff --git a/iconv/Makefile b/iconv/Makefile index b6c4f23d5a..e92eb0bb39 100644 --- a/iconv/Makefile +++ b/iconv/Makefile @@ -34,9 +34,19 @@ CFLAGS-gconv_db.c = -DSTATIC_GCONV CFLAGS-gconv_simple.c = -DSTATIC_GCONV endif +vpath %.c ../locale/programs + +iconv_prog-modules = iconv_charmap charmap charmap-dir linereader \ + dummy-repertoire simple-hash xstrdup xmalloc +CFLAGS-iconv_prog.c = -I../locale/programs +CFLAGS-iconv_charmap.c = -I../locale/programs +CFLAGS-dummy-repertoire.c = -I../locale/programs +CFLAGS-charmap.c = -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \ + -DDEFAULT_CHARMAP=null_pointer + tests = tst-iconv1 tst-iconv2 tst-iconv3 -distribute = gconv_builtin.h gconv_int.h loop.c skeleton.c +distribute = gconv_builtin.h gconv_int.h loop.c skeleton.c iconv_prog.h others = iconv_prog install-others = $(inst_bindir)/iconv @@ -47,3 +57,5 @@ include ../Rules $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force) $(do-install-program) + +$(objpfx)iconv_prog: $(iconv_prog-modules:%=$(objpfx)%.o) diff --git a/iconv/dummy-repertoire.c b/iconv/dummy-repertoire.c new file mode 100644 index 0000000000..d3b455eee8 --- /dev/null +++ b/iconv/dummy-repertoire.c @@ -0,0 +1,37 @@ +/* Copyright (C) 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* For iconv we don't have to handle repertoire maps. Provide dummy + definitions to allow the use of linereader.c unchanged. */ +#include <repertoire.h> + + +uint32_t +repertoire_find_value (const struct repertoire_t *repertoire, const char *name, + size_t len) +{ + return ILLEGAL_CHAR_VALUE; +} + + +const char * +repertoire_find_symbol (const struct repertoire_t *repertoire, uint32_t ucs) +{ + return NULL; +} diff --git a/iconv/iconv_charmap.c b/iconv/iconv_charmap.c new file mode 100644 index 0000000000..4b72b1bad2 --- /dev/null +++ b/iconv/iconv_charmap.c @@ -0,0 +1,563 @@ +/* Convert using charmaps and possibly iconv(). + Copyright (C) 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <iconv.h> +#include <libintl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "iconv_prog.h" + + +/* Prototypes for a few program-wide used functions. */ +extern void *xmalloc (size_t __n); +extern void *xcalloc (size_t __n, size_t __s); + + +struct convtable +{ + int term[256 / 8]; + union + { + struct convtable *sub; + struct charseq *out; + } val[256]; +}; + + +static inline struct convtable * +allocate_table (void) +{ + return (struct convtable *) xcalloc (1, sizeof (struct convtable)); +} + + +static inline int +is_term (struct convtable *tbl, unsigned int idx) +{ + return tbl->term[idx / 8] & (1 << (idx % 8)); +} + + +static inline void +clear_term (struct convtable *tbl, unsigned int idx) +{ + tbl->term[idx / 8] &= ~(1 << (idx % 8)); +} + + +static inline void +set_term (struct convtable *tbl, unsigned int idx) +{ + tbl->term[idx / 8] |= 1 << (idx % 8); +} + + +/* Generate the conversion table. */ +static struct convtable *use_from_charmap (struct charmap_t *from_charmap, + const char *to_code); +static struct convtable *use_to_charmap (const char *from_code, + struct charmap_t *to_charmap); +static struct convtable *use_both_charmaps (struct charmap_t *from_charmap, + struct charmap_t *to_charmap); + +/* Prototypes for the functions doing the actual work. */ +static int process_block (struct convtable *tbl, char *addr, size_t len, + FILE *output); +static int process_fd (struct convtable *tbl, int fd, FILE *output); +static int process_file (struct convtable *tbl, FILE *input, FILE *output); + + +int +charmap_conversion (const char *from_code, struct charmap_t *from_charmap, + const char *to_code, struct charmap_t *to_charmap, + int argc, int remaining, char *argv[], FILE *output) +{ + struct convtable *cvtbl; + int status = EXIT_SUCCESS; + + /* We have three different cases to handle: + + - both, from_charmap and to_charmap, are available. This means we + can assume that the symbolic names match and use them to create + the mapping. + + - only from_charmap is available. In this case we can only hope that + the symbolic names used are of the <Uxxxx> form in which case we + can use a UCS4->"to_code" iconv() conversion for the second step. + + - only to_charmap is available. This is similar, only that we would + use iconv() for the "to_code"->UCS4 conversion. + + We first create a table which maps input bytes into output bytes. + Once this is done we can handle all three of the cases above + equally. */ + if (from_charmap != NULL) + { + if (to_charmap == NULL) + cvtbl = use_from_charmap (from_charmap, to_code); + else + cvtbl = use_both_charmaps (from_charmap, to_charmap); + } + else + { + assert (to_charmap != NULL); + cvtbl = use_to_charmap (from_code, to_charmap); + } + + /* If we couldn't generate a table stop now. */ + if (cvtbl == NULL) + return EXIT_FAILURE; + + /* We can now start the conversion. */ + if (remaining == argc) + { + if (process_file (cvtbl, stdin, output) != 0) + status = EXIT_FAILURE; + } + else + do + { + struct stat st; + char *addr; + int fd; + + if (verbose) + printf ("%s:\n", argv[remaining]); + if (strcmp (argv[remaining], "-") == 0) + fd = 0; + else + { + fd = open (argv[remaining], O_RDONLY); + + if (fd == -1) + { + error (0, errno, _("cannot open input file `%s'"), + argv[remaining]); + status = EXIT_FAILURE; + continue; + } + } + +#ifdef _POSIX_MAPPED_FILES + /* We have possibilities for reading the input file. First try + to mmap() it since this will provide the fastest solution. */ + if (fstat (fd, &st) == 0 + && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, + fd, 0)) != MAP_FAILED)) + { + /* Yes, we can use mmap(). The descriptor is not needed + anymore. */ + if (close (fd) != 0) + error (EXIT_FAILURE, errno, + _("error while closing input `%s'"), argv[remaining]); + + if (process_block (cvtbl, addr, st.st_size, output) < 0) + { + /* Something went wrong. */ + status = EXIT_FAILURE; + + /* We don't need the input data anymore. */ + munmap ((void *) addr, st.st_size); + + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + + /* We don't need the input data anymore. */ + munmap ((void *) addr, st.st_size); + } + else +#endif /* _POSIX_MAPPED_FILES */ + { + /* Read the file in pieces. */ + if (process_fd (cvtbl, fd, output) != 0) + { + /* Something went wrong. */ + status = EXIT_FAILURE; + + /* We don't need the input file anymore. */ + close (fd); + + /* We cannot go on with producing output since it might + lead to problem because the last output might leave + the output stream in an undefined state. */ + break; + } + + /* Now close the file. */ + close (fd); + } + } + while (++remaining < argc); + + /* All done. */ + return status; +} + + +static void +add_bytes (struct convtable *tbl, struct charseq *in, struct charseq *out) +{ + int n = 0; + unsigned int byte; + + assert (in->nbytes > 0); + + byte = ((unsigned char *) in->bytes)[n]; + while (n + 1 < in->nbytes) + { + if (is_term (tbl, byte) || tbl->val[byte].sub == NULL) + { + /* Note that we simply ignore a definition for a byte sequence + which is also the prefix for a longer one. */ + clear_term (tbl, byte); + tbl->val[byte].sub = + (struct convtable *) xcalloc (1, sizeof (struct convtable)); + } + + tbl = tbl->val[byte].sub; + + byte = ((unsigned char *) in->bytes)[++n]; + } + + /* Only add the new sequence if there is none yet and the byte sequence + is not part of an even longer one. */ + if (! is_term (tbl, byte) && tbl->val[byte].sub == NULL) + { + set_term (tbl, byte); + tbl->val[byte].out = out; + } +} + + +static struct convtable * +use_from_charmap (struct charmap_t *from_charmap, const char *to_code) +{ + /* We iterate over all entries in the from_charmap and for those which + have a known UCS4 representation we use an iconv() call to determine + the mapping to the to_code charset. */ + struct convtable *rettbl; + iconv_t cd; + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + cd = iconv_open (to_code, "WCHAR_T"); + if (cd == (iconv_t) -1) + /* We cannot do anything. */ + return NULL; + + rettbl = allocate_table (); + + while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *in = (struct charseq *) data; + + if (in->ucs4 != UNINITIALIZED_CHAR_VALUE) + { + /* There is a chance. Try the iconv module. */ + wchar_t inbuf[1] = { in->ucs4 }; + unsigned char outbuf[64]; + char *inptr = (char *) inbuf; + size_t inlen = sizeof (inbuf); + char *outptr = (char *) outbuf; + size_t outlen = sizeof (outbuf); + + (void) iconv (cd, &inptr, &inlen, &outptr, &outlen); + + if (outptr != (char *) outbuf) + { + /* We got some output. Good, use it. */ + struct charseq *newp; + + outlen = sizeof (outbuf) - outlen; + assert ((char *) outbuf + outlen == outptr); + + newp = (struct charseq *) xmalloc (sizeof (struct charseq) + + outlen); + newp->name = in->name; + newp->ucs4 = in->ucs4; + newp->nbytes = outlen; + memcpy (newp->bytes, outbuf, outlen); + + add_bytes (rettbl, in, newp); + } + + /* Clear any possible state left behind. */ + (void) iconv (cd, NULL, NULL, NULL, NULL); + } + } + + iconv_close (cd); + + return rettbl; +} + + +static struct convtable * +use_to_charmap (const char *from_code, struct charmap_t *to_charmap) +{ + /* We iterate over all entries in the to_charmap and for those which + have a known UCS4 representation we use an iconv() call to determine + the mapping to the from_code charset. */ + struct convtable *rettbl; + iconv_t cd; + void *ptr = NULL; + const void *key; + size_t keylen; + void *data; + + /* Note that the conversion we use here is the reverse direction. Without + exhaustive search we cannot figure out which input yields the UCS4 + character we are looking for. Therefore we determine it the other + way round. */ + cd = iconv_open (from_code, "WCHAR_T"); + if (cd == (iconv_t) -1) + /* We cannot do anything. */ + return NULL; + + rettbl = allocate_table (); + + while (iterate_table (&to_charmap->char_table, &ptr, &key, &keylen, &data) + >= 0) + { + struct charseq *out = (struct charseq *) data; + + if (out->ucs4 != UNINITIALIZED_CHAR_VALUE) + { + /* There is a chance. Try the iconv module. */ + wchar_t inbuf[1] = { out->ucs4 }; + unsigned char outbuf[64]; + char *inptr = (char *) inbuf; + size_t inlen = sizeof (inbuf); + char *outptr = (char *) outbuf; + size_t outlen = sizeof (outbuf); + + (void) iconv (cd, &inptr, &inlen, &outptr, &outlen); + + if (outptr != (char *) outbuf) + { + /* We got some output. Good, use it. */ + struct charseq *newp; + + outlen = sizeof (outbuf) - outlen; + assert ((char *) outbuf + outlen == outptr); + + newp = (struct charseq *) xmalloc (sizeof (struct cha |
