From 8f5ca04bc7fd53741d80117df992995ace8f6d2d Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 16 Oct 1995 01:37:51 +0000 Subject: Sat Oct 14 02:52:36 1995 Ulrich Drepper * malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include . * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up. Sat Oct 14 02:52:36 1995 Ulrich Drepper * malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include . * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up. --- .cvsignore | 2 +- ChangeLog | 40 ++++ configure.in | 28 +-- hurd/Makefile | 4 +- hurd/hurd.h | 69 ++---- hurd/hurdinit.c | 6 + stdio/_itoa.c | 401 +++++++++++++++++++++++++++++++++- stdio/_itoa.h | 26 --- stdio/test_rdwr.c | 1 + stdlib/gmp-impl.h | 66 ++++-- stdlib/gmp.h | 40 ++-- stdlib/longlong.h | 229 +++++++++++-------- sysdeps/alpha/add_n.s | 119 ++++++++++ sysdeps/alpha/addmul_1.s | 100 +++++++++ sysdeps/alpha/alphaev5/add_n.s | 118 ++++++++++ sysdeps/alpha/alphaev5/lshift.s | 175 +++++++++++++++ sysdeps/alpha/alphaev5/rshift.s | 173 +++++++++++++++ sysdeps/alpha/lshift.s | 108 +++++++++ sysdeps/alpha/mul_1.s | 84 +++++++ sysdeps/alpha/rshift.s | 106 +++++++++ sysdeps/alpha/sub_n.s | 119 ++++++++++ sysdeps/alpha/submul_1.s | 100 +++++++++ sysdeps/alpha/udiv_qrnnd.S | 2 +- sysdeps/generic/divmod_1.c | 6 +- sysdeps/generic/mod_1.c | 2 - sysdeps/hppa/add_n.s | 57 +++++ sysdeps/hppa/hppa1.1/addmul_1.s | 101 +++++++++ sysdeps/hppa/hppa1.1/mul_1.s | 97 ++++++++ sysdeps/hppa/hppa1.1/submul_1.s | 110 ++++++++++ sysdeps/hppa/hppa1.1/udiv_qrnnd.s | 74 +++++++ sysdeps/hppa/lshift.s | 65 ++++++ sysdeps/hppa/rshift.s | 62 ++++++ sysdeps/hppa/sub_n.s | 58 +++++ sysdeps/hppa/udiv_qrnnd.s | 285 ++++++++++++++++++++++++ sysdeps/i386/add_n.S | 18 +- sysdeps/i386/gmp-mparam.h | 28 +++ sysdeps/i386/i486/strcat.S | 260 ++++++++++++++++++++++ sysdeps/i386/i486/strlen.S | 132 +++++++++++ sysdeps/i386/i586/Implies | 2 + sysdeps/i386/i586/add_n.S | 136 ++++++++++++ sysdeps/i386/i586/addmul_1.S | 84 +++++++ sysdeps/i386/i586/lshift.S | 213 ++++++++++++++++++ sysdeps/i386/i586/memcopy.h | 6 +- sysdeps/i386/i586/mul_1.S | 78 +++++++ sysdeps/i386/i586/rshift.S | 213 ++++++++++++++++++ sysdeps/i386/i586/strchr.S | 334 ++++++++++++++++++++++++++++ sysdeps/i386/i586/strlen.S | 185 ++++++++++++++++ sysdeps/i386/i586/sub_n.S | 136 ++++++++++++ sysdeps/i386/i586/submul_1.S | 82 +++++++ sysdeps/i386/memchr.S | 315 ++++++++++++++++++++++++++ sysdeps/i386/memchr.c | 48 ---- sysdeps/i386/memcmp.S | 68 ++++++ sysdeps/i386/stpcpy.S | 87 ++++++++ sysdeps/i386/stpncpy.S | 143 ++++++++++++ sysdeps/i386/strchr.S | 278 +++++++++++++++++++++++ sysdeps/i386/strcspn.S | 176 +++++++++++++++ sysdeps/i386/strpbrk.S | 177 +++++++++++++++ sysdeps/i386/strrchr.S | 321 +++++++++++++++++++++++++++ sysdeps/i386/strspn.S | 176 +++++++++++++++ sysdeps/i386/sub_n.S | 26 ++- sysdeps/i960/add_n.s | 21 ++ sysdeps/i960/addmul_1.s | 26 +++ sysdeps/i960/mul_1.s | 23 ++ sysdeps/i960/sub_n.s | 21 ++ sysdeps/m88k/m88100/add_n.s | 103 +++++++++ sysdeps/m88k/m88100/mul_1.s | 128 +++++++++++ sysdeps/m88k/m88100/sub_n.s | 104 +++++++++ sysdeps/m88k/m88110/mul_1.s | 84 +++++++ sysdeps/mips/add_n.s | 119 ++++++++++ sysdeps/mips/addmul_1.s | 96 ++++++++ sysdeps/mips/lshift.s | 94 ++++++++ sysdeps/mips/mips3/add_n.s | 119 ++++++++++ sysdeps/mips/mips3/addmul_1.s | 96 ++++++++ sysdeps/mips/mips3/gmp-mparam.h | 26 +++ sysdeps/mips/mips3/lshift.s | 94 ++++++++ sysdeps/mips/mips3/mul_1.s | 84 +++++++ sysdeps/mips/mips3/rshift.s | 91 ++++++++ sysdeps/mips/mips3/sub_n.s | 119 ++++++++++ sysdeps/mips/mips3/submul_1.s | 96 ++++++++ sysdeps/mips/mul_1.s | 84 +++++++ sysdeps/mips/rshift.s | 91 ++++++++ sysdeps/mips/sub_n.s | 119 ++++++++++ sysdeps/mips/submul_1.s | 96 ++++++++ sysdeps/rs6000/add_n.s | 54 +++++ sysdeps/rs6000/addmul_1.s | 122 +++++++++++ sysdeps/rs6000/lshift.s | 58 +++++ sysdeps/rs6000/mul_1.s | 109 +++++++++ sysdeps/rs6000/rshift.s | 56 +++++ sysdeps/rs6000/sub_n.s | 55 +++++ sysdeps/rs6000/submul_1.s | 127 +++++++++++ sysdeps/sparc/add_n.S | 15 +- sysdeps/sparc/sparc8/addmul_1.S | 7 + sysdeps/sparc/sparc8/mul_1.S | 7 + sysdeps/sparc/sub_n.S | 15 +- sysdeps/unix/sysv/linux/Dist | 1 + sysdeps/unix/sysv/linux/Makefile | 6 +- sysdeps/unix/sysv/linux/i386/sysdep.h | 76 ++++--- sysdeps/unix/sysv/linux/local_lim.h | 17 +- sysdeps/unix/sysv/linux/nfs/nfs.h | 1 + sysdeps/unix/sysv/linux/sys/param.h | 31 ++- sysdeps/vax/add_n.s | 47 ++++ sysdeps/vax/addmul_1.s | 125 +++++++++++ sysdeps/vax/gmp-mparam.h | 28 +++ sysdeps/vax/mul_1.s | 122 +++++++++++ sysdeps/vax/sub_n.s | 47 ++++ sysdeps/vax/submul_1.s | 125 +++++++++++ sysdeps/z8000/add_n.s | 52 +++++ sysdeps/z8000/mul_1.s | 67 ++++++ sysdeps/z8000/sub_n.s | 53 +++++ 109 files changed, 9753 insertions(+), 359 deletions(-) create mode 100644 sysdeps/alpha/add_n.s create mode 100644 sysdeps/alpha/addmul_1.s create mode 100644 sysdeps/alpha/alphaev5/add_n.s create mode 100644 sysdeps/alpha/alphaev5/lshift.s create mode 100644 sysdeps/alpha/alphaev5/rshift.s create mode 100644 sysdeps/alpha/lshift.s create mode 100644 sysdeps/alpha/mul_1.s create mode 100644 sysdeps/alpha/rshift.s create mode 100644 sysdeps/alpha/sub_n.s create mode 100644 sysdeps/alpha/submul_1.s create mode 100644 sysdeps/hppa/add_n.s create mode 100644 sysdeps/hppa/hppa1.1/addmul_1.s create mode 100644 sysdeps/hppa/hppa1.1/mul_1.s create mode 100644 sysdeps/hppa/hppa1.1/submul_1.s create mode 100644 sysdeps/hppa/hppa1.1/udiv_qrnnd.s create mode 100644 sysdeps/hppa/lshift.s create mode 100644 sysdeps/hppa/rshift.s create mode 100644 sysdeps/hppa/sub_n.s create mode 100644 sysdeps/hppa/udiv_qrnnd.s create mode 100644 sysdeps/i386/gmp-mparam.h create mode 100644 sysdeps/i386/i486/strcat.S create mode 100644 sysdeps/i386/i486/strlen.S create mode 100644 sysdeps/i386/i586/Implies create mode 100644 sysdeps/i386/i586/add_n.S create mode 100644 sysdeps/i386/i586/addmul_1.S create mode 100644 sysdeps/i386/i586/lshift.S create mode 100644 sysdeps/i386/i586/mul_1.S create mode 100644 sysdeps/i386/i586/rshift.S create mode 100644 sysdeps/i386/i586/strchr.S create mode 100644 sysdeps/i386/i586/strlen.S create mode 100644 sysdeps/i386/i586/sub_n.S create mode 100644 sysdeps/i386/i586/submul_1.S create mode 100644 sysdeps/i386/memchr.S delete mode 100644 sysdeps/i386/memchr.c create mode 100644 sysdeps/i386/memcmp.S create mode 100644 sysdeps/i386/stpcpy.S create mode 100644 sysdeps/i386/stpncpy.S create mode 100644 sysdeps/i386/strchr.S create mode 100644 sysdeps/i386/strcspn.S create mode 100644 sysdeps/i386/strpbrk.S create mode 100644 sysdeps/i386/strrchr.S create mode 100644 sysdeps/i386/strspn.S create mode 100644 sysdeps/i960/add_n.s create mode 100644 sysdeps/i960/addmul_1.s create mode 100644 sysdeps/i960/mul_1.s create mode 100644 sysdeps/i960/sub_n.s create mode 100644 sysdeps/m88k/m88100/add_n.s create mode 100644 sysdeps/m88k/m88100/mul_1.s create mode 100644 sysdeps/m88k/m88100/sub_n.s create mode 100644 sysdeps/m88k/m88110/mul_1.s create mode 100644 sysdeps/mips/add_n.s create mode 100644 sysdeps/mips/addmul_1.s create mode 100644 sysdeps/mips/lshift.s create mode 100644 sysdeps/mips/mips3/add_n.s create mode 100644 sysdeps/mips/mips3/addmul_1.s create mode 100644 sysdeps/mips/mips3/gmp-mparam.h create mode 100644 sysdeps/mips/mips3/lshift.s create mode 100644 sysdeps/mips/mips3/mul_1.s create mode 100644 sysdeps/mips/mips3/rshift.s create mode 100644 sysdeps/mips/mips3/sub_n.s create mode 100644 sysdeps/mips/mips3/submul_1.s create mode 100644 sysdeps/mips/mul_1.s create mode 100644 sysdeps/mips/rshift.s create mode 100644 sysdeps/mips/sub_n.s create mode 100644 sysdeps/mips/submul_1.s create mode 100644 sysdeps/rs6000/add_n.s create mode 100644 sysdeps/rs6000/addmul_1.s create mode 100644 sysdeps/rs6000/lshift.s create mode 100644 sysdeps/rs6000/mul_1.s create mode 100644 sysdeps/rs6000/rshift.s create mode 100644 sysdeps/rs6000/sub_n.s create mode 100644 sysdeps/rs6000/submul_1.s create mode 100644 sysdeps/unix/sysv/linux/nfs/nfs.h create mode 100644 sysdeps/vax/add_n.s create mode 100644 sysdeps/vax/addmul_1.s create mode 100644 sysdeps/vax/gmp-mparam.h create mode 100644 sysdeps/vax/mul_1.s create mode 100644 sysdeps/vax/sub_n.s create mode 100644 sysdeps/vax/submul_1.s create mode 100644 sysdeps/z8000/add_n.s create mode 100644 sysdeps/z8000/mul_1.s create mode 100644 sysdeps/z8000/sub_n.s diff --git a/.cvsignore b/.cvsignore index cf479d2b3b..ff3f67f646 100644 --- a/.cvsignore +++ b/.cvsignore @@ -5,7 +5,7 @@ glibc-* configparms -sun4 i386 i386-gnuelf hp300-netbsd hp300 i486-linux +sun[43]* i[345]86* hp300* ieeetest hppa-sysdeps regex diff --git a/ChangeLog b/ChangeLog index 618dd3e438..d8a781c0f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,43 @@ +Sat Oct 14 02:52:36 1995 Ulrich Drepper + + * malloc/malloc.c (_malloc_internal): Performance fix. Move + if statement out of loop. + + * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster + implementation using GMP functions. Contributed by + Torbjorn Granlund and Ulrich Drepper. + + * stdio/test_rdwr.c: Include . + + * sysdeps/i386/i586/Implies: New file. + + New highly optimized string functions for i[345]86. + * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. + * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. + * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. + * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. + * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. + * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. + * sysdeps/i386/i586/strlen.S: New file. + * sysdeps/i386/memchr.c: Removed. There is now an assembler version. + + * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did + not correspond to used values. + + * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper + around a kernel header file. + * sysdeps/unix/sysv/linux/Dist: Add it. + * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): + Likewise. + + * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of + defining ourself we use a kernel header file. + + * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system + call handler for i586. + + * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up. + Wed Oct 11 00:00:00 1995 Roland McGrath * sysdeps/i386/dl-machine.h (elf_machine_rel): Use +=, not =, to diff --git a/configure.in b/configure.in index 05191befa5..e7d4ecbb39 100644 --- a/configure.in +++ b/configure.in @@ -82,22 +82,18 @@ changequote(,)dnl # Expand the configuration machine name into a subdirectory by architecture # type and particular chip. case "$machine" in -i[345]86) - machine=i386/$machine ;; -sparc[6789]) - machine=sparc/$machine ;; -m68k) - machine=m68k/m68020 ;; -m680?0) - machine=m68k/$machine ;; -m88k) - machine=m88k/m88100 ;; -m88???) - machine=m88k/$machine ;; -mips64*) - machine=mips/mips64/$machine ;; -mips*) - machine=mips/$machine ;; +a29k | am29000) machine=a29k ;; +alpha*) machine=alpha/$machine ;; +hppa*) machine=hppa/$machine ;; +i[345]86) machine=i386/$machine ;; +m680?0) machine=m68k/$machine ;; +m68k) machine=m68k/m68020 ;; +m88???) machine=m88k/$machine ;; +m88k) machine=m88k/m88100 ;; +mips*) machine=mips/$machine ;; +mips64*) machine=mips/mips64/$machine ;; +sparc[6789]) machine=sparc/$machine ;; +supersparc) machine=sparc/sparc8 ;; esac # Make sco3.2v4 become sco3.2.4 and sunos4.1.1_U1 become sunos4.1.1.U1. diff --git a/hurd/Makefile b/hurd/Makefile index 582f37b3fb..53b73487f8 100644 --- a/hurd/Makefile +++ b/hurd/Makefile @@ -26,7 +26,7 @@ include ../Makeconfig headers = hurd.h $(interface-headers) \ $(addprefix hurd/,fd.h id.h port.h signal.h userlink.h \ - resource.h threadvar.h) + resource.h threadvar.h lookup.h) distribute := hurdstartup.h hurdfault.h intr-rpc.defs STATUS @@ -44,7 +44,7 @@ routines = hurdstartup hurdinit \ setauth \ pid2task task2pid \ getuids setuids getumask fchroot \ - hurdsock hurdauth invoke-trans \ + hurdsock hurdauth \ privports \ msgportdemux \ fopenport \ diff --git a/hurd/hurd.h b/hurd/hurd.h index acad15b8c4..17b5c45d89 100644 --- a/hurd/hurd.h +++ b/hurd/hurd.h @@ -77,11 +77,16 @@ extern struct hurd_port *_hurd_ports; extern unsigned int _hurd_nports; extern volatile mode_t _hurd_umask; -/* Shorthand macro for referencing _hurd_ports (see ). */ +/* Shorthand macro for internal library code referencing _hurd_ports (see + ). */ #define __USEPORT(which, expr) \ HURD_PORT_USE (&_hurd_ports[INIT_PORT_##which], (expr)) +/* Function version of __USEPORT: calls OPERATE with a send right. */ + +extern error_t _hurd_ports_use (int which, error_t (*operate) (mach_port_t)); + /* Base address and size of the initial stack set up by the exec server. If using cthreads, this stack is deallocated in startup. @@ -150,52 +155,6 @@ extern int setcttyid (mach_port_t); extern int __setauth (auth_t), setauth (auth_t); -/* Split FILE into a directory and a name within the directory. Look up a - port for the directory and store it in *DIR; store in *NAME a pointer - into FILE where the name within directory begins. The directory lookup - uses CRDIR for the root directory and CWDIR for the current directory. - Returns zero on success or an error code. */ - -extern error_t __hurd_file_name_split (file_t crdir, file_t cwdir, - const char *file, - file_t *dir, char **name); -extern error_t hurd_file_name_split (file_t crdir, file_t cwdir, - const char *file, - file_t *dir, char **name); - -/* Open a port to FILE with the given FLAGS and MODE (see ). - The file lookup uses CRDIR for the root directory and CWDIR for the - current directory. If successful, returns zero and store the port - to FILE in *PORT; otherwise returns an error code. */ - -extern error_t __hurd_file_name_lookup (file_t crdir, file_t cwdir, - const char *file, - int flags, mode_t mode, - file_t *port); -extern error_t hurd_file_name_lookup (file_t crdir, file_t cwdir, - const char *filename, - int flags, mode_t mode, - file_t *port); - -/* Process the values returned by `dir_lookup' et al, and loop doing - `dir_lookup' calls until one returns FS_RETRY_NONE. CRDIR is the - root directory used for things like symlinks to absolute file names; the - other arguments should be those just passed to and/or returned from - `dir_lookup', `fsys_getroot', or `file_invoke_translator'. This - function consumes the reference in *RESULT even if it returns an error. */ - -extern error_t __hurd_file_name_lookup_retry (file_t crdir, - enum retry_type doretry, - char retryname[1024], - int flags, mode_t mode, - file_t *result); -extern error_t hurd_file_name_lookup_retry (file_t crdir, - enum retry_type doretry, - char retryname[1024], - int flags, mode_t mode, - file_t *result); - - /* Split FILE into a directory and a name within the directory. The directory lookup uses the current root and working directory. If successful, stores in *NAME a pointer into FILE where the name @@ -213,15 +172,15 @@ extern file_t file_name_split (const char *file, char **name); extern file_t __file_name_lookup (const char *file, int flags, mode_t mode); extern file_t file_name_lookup (const char *file, int flags, mode_t mode); -/* Invoke any translator set on the node FILE represents, and return in - *TRANSLATED a port to the translated node. FLAGS are as for - `dir_lookup' et al, but the returned port will not necessarily have - any more access rights than FILE does. */ +/* Open a port to FILE with the given FLAGS and MODE (see ). The + file lookup uses the current root directory, but uses STARTDIR as the + "working directory" for file relative names. Returns a port to the file + if successful; otherwise sets `errno' and returns MACH_PORT_NULL. */ -extern error_t __hurd_invoke_translator (file_t file, int flags, - file_t *translated); -extern error_t hurd_invoke_translator (file_t file, int flags, - file_t *translated); +extern file_t __file_name_lookup_under (file_t startdir, const char *file, + int flags, mode_t mode); +extern file_t file_name_lookup_under (file_t startdir, const char *file, + int flags, mode_t mode); /* Open a file descriptor on a port. FLAGS are as for `open'; flags diff --git a/hurd/hurdinit.c b/hurd/hurdinit.c index af892112b4..409d2d19a0 100644 --- a/hurd/hurdinit.c +++ b/hurd/hurdinit.c @@ -31,6 +31,12 @@ struct hurd_port *_hurd_ports; unsigned int _hurd_nports; mode_t _hurd_umask; +error_t +_hurd_ports_use (int which, error_t (*operate) (mach_port_t)) +{ + return HURD_PORT_USE (&_hurd_ports[which], (*operate) (port)); +} + void _hurd_proc_init (char **argv); DEFINE_HOOK (_hurd_subinit, (void)); diff --git a/stdio/_itoa.c b/stdio/_itoa.c index 19e732dcfe..caa8179624 100644 --- a/stdio/_itoa.c +++ b/stdio/_itoa.c @@ -1,6 +1,8 @@ /* Internal function for converting integers to ASCII. Copyright (C) 1994, 1995 Free Software Foundation, Inc. This file is part of the GNU C Library. +Contributed by Torbjorn Granlund +and Ulrich Drepper . The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -17,13 +19,400 @@ License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include +#include "../stdlib/gmp.h" +#include "../stdlib/gmp-impl.h" +#include "../stdlib/longlong.h" + +#include "_itoa.h" + + +/* Canonize environment. For some architectures not all values might + be defined in the GMP header files. */ +#ifndef UMUL_TIME +# define UMUL_TIME 1 +#endif +#ifndef UDIV_TIME +# define UDIV_TIME 1 +#endif + +/* Control memory layout. */ +#ifdef PACK +# undef PACK +# define PACK __attribute__ ((packed)) +#else +# define PACK +#endif + + +/* Declare local types. */ +struct base_table_t +{ +#if (UDIV_TIME > 2 * UMUL_TIME) + mp_limb base_multiplier; +#endif + char flag; + char post_shift; +#if BITS_PER_MP_LIMB == 32 + struct + { + char normalization_steps; + char ndigits; + mp_limb base PACK; +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb base_ninv PACK; +#endif + } big; +#endif +}; + +/* To reduce the memory needed we include some fields of the tables + only confitionally. */ +#if BITS_PER_MP_LIMB == 32 +# if UDIV_TIME > 2 * UMUL_TIME +# define SEL1(X) X, +# define SEL2(X) ,X +# else +# define SEL1(X) +# define SEL2(X) +# endif +#endif + + +/* Local variables. */ +static const struct base_table_t base_table[] = +{ +#if BITS_PER_MP_LIMB == 64 + /* 2 */ {0ul, 1, 1}, + /* 3 */ {0xaaaaaaaaaaaaaaabul, 0, 1}, + /* 4 */ {0ul, 1, 2}, + /* 5 */ {0xcccccccccccccccdul, 0, 2}, + /* 6 */ {0xaaaaaaaaaaaaaaabul, 0, 2}, + /* 7 */ {0x2492492492492493ul, 1, 3}, + /* 8 */ {0ul, 1, 3}, + /* 9 */ {0xe38e38e38e38e38ful, 0, 3}, + /* 10 */ {0xcccccccccccccccdul, 0, 3}, + /* 11 */ {0x2e8ba2e8ba2e8ba3ul, 0, 1}, + /* 12 */ {0xaaaaaaaaaaaaaaabul, 0, 3}, + /* 13 */ {0x4ec4ec4ec4ec4ec5ul, 0, 2}, + /* 14 */ {0x2492492492492493ul, 1, 4}, + /* 15 */ {0x8888888888888889ul, 0, 3}, + /* 16 */ {0ul, 1, 4}, + /* 17 */ {0xf0f0f0f0f0f0f0f1ul, 0, 4}, + /* 18 */ {0xe38e38e38e38e38ful, 0, 4}, + /* 19 */ {0xd79435e50d79435ful, 0, 4}, + /* 20 */ {0xcccccccccccccccdul, 0, 4}, + /* 21 */ {0x8618618618618619ul, 1, 5}, + /* 22 */ {0x2e8ba2e8ba2e8ba3ul, 0, 2}, + /* 23 */ {0x642c8590b21642c9ul, 1, 5}, + /* 24 */ {0xaaaaaaaaaaaaaaabul, 0, 4}, + /* 25 */ {0x47ae147ae147ae15ul, 1, 5}, + /* 26 */ {0x4ec4ec4ec4ec4ec5ul, 0, 3}, + /* 27 */ {0x97b425ed097b425ful, 0, 4}, + /* 28 */ {0x2492492492492493ul, 1, 5}, + /* 29 */ {0x1a7b9611a7b9611bul, 1, 5}, + /* 30 */ {0x8888888888888889ul, 0, 4}, + /* 31 */ {0x0842108421084211ul, 1, 5}, + /* 32 */ {0ul, 1, 5}, + /* 33 */ {0x0f83e0f83e0f83e1ul, 0, 1}, + /* 34 */ {0xf0f0f0f0f0f0f0f1ul, 0, 5}, + /* 35 */ {0xea0ea0ea0ea0ea0ful, 0, 5}, + /* 36 */ {0xe38e38e38e38e38ful, 0, 5} +#endif +#if BITS_PER_MP_LIMB == 32 + /* 2 */ {SEL1(0ul) 1, 1, {0, 31, 0x80000000ul SEL2(0xfffffffful)}}, + /* 3 */ {SEL1(0xaaaaaaabul) 0, 1, {0, 20, 0xcfd41b91ul SEL2(0x3b563c24ul)}}, + /* 4 */ {SEL1(0ul) 1, 2, {1, 15, 0x40000000ul SEL2(0xfffffffful)}}, + /* 5 */ {SEL1(0xcccccccdul) 0, 2, {1, 13, 0x48c27395ul SEL2(0xc25c2684ul)}}, + /* 6 */ {SEL1(0xaaaaaaabul) 0, 2, {0, 12, 0x81bf1000ul SEL2(0xf91bd1b6ul)}}, + /* 7 */ {SEL1(0x24924925ul) 1, 3, {1, 11, 0x75db9c97ul SEL2(0x1607a2cbul)}}, + /* 8 */ {SEL1(0ul) 1, 3, {1, 10, 0x40000000ul SEL2(0xfffffffful)}}, + /* 9 */ {SEL1(0x38e38e39ul) 0, 1, {0, 10, 0xcfd41b91ul SEL2(0x3b563c24ul)}}, + /* 10 */ {SEL1(0xcccccccdul) 0, 3, {2, 9, 0x3b9aca00ul SEL2(0x12e0be82ul)}}, + /* 11 */ {SEL1(0xba2e8ba3ul) 0, 3, {0, 9, 0x8c8b6d2bul SEL2(0xd24cde04ul)}}, + /* 12 */ {SEL1(0xaaaaaaabul) 0, 3, {3, 8, 0x19a10000ul SEL2(0x3fa39ab5ul)}}, + /* 13 */ {SEL1(0x4ec4ec4ful) 0, 2, {2, 8, 0x309f1021ul SEL2(0x50f8ac5ful)}}, + /* 14 */ {SEL1(0x24924925ul) 1, 4, {1, 8, 0x57f6c100ul SEL2(0x74843b1eul)}}, + /* 15 */ {SEL1(0x88888889ul) 0, 3, {0, 8, 0x98c29b81ul SEL2(0xad0326c2ul)}}, + /* 16 */ {SEL1(0ul) 1, 4, {3, 7, 0x10000000ul SEL2(0xfffffffful)}}, + /* 17 */ {SEL1(0xf0f0f0f1ul) 0, 4, {3, 7, 0x18754571ul SEL2(0x4ef0b6bdul)}}, + /* 18 */ {SEL1(0x38e38e39ul) 0, 2, {2, 7, 0x247dbc80ul SEL2(0xc0fc48a1ul)}}, + /* 19 */ {SEL1(0xaf286bcbul) 1, 5, {2, 7, 0x3547667bul SEL2(0x33838942ul)}}, + /* 20 */ {SEL1(0xcccccccdul) 0, 4, {1, 7, 0x4c4b4000ul SEL2(0xad7f29abul)}}, + /* 21 */ {SEL1(0x86186187ul) 1, 5, {1, 7, 0x6b5a6e1dul SEL2(0x313c3d15ul)}}, + /* 22 */ {SEL1(0xba2e8ba3ul) 0, 4, {0, 7, 0x94ace180ul SEL2(0xb8cca9e0ul)}}, + /* 23 */ {SEL1(0xb21642c9ul) 0, 4, {0, 7, 0xcaf18367ul SEL2(0x42ed6de9ul)}}, + /* 24 */ {SEL1(0xaaaaaaabul) 0, 4, {4, 6, 0x0b640000ul SEL2(0x67980e0bul)}}, + /* 25 */ {SEL1(0x51eb851ful) 0, 3, {4, 6, 0x0e8d4a51ul SEL2(0x19799812ul)}}, + /* 26 */ {SEL1(0x4ec4ec4ful) 0, 3, {3, 6, 0x1269ae40ul SEL2(0xbce85396ul)}}, + /* 27 */ {SEL1(0x2f684bdbul) 1, 5, {3, 6, 0x17179149ul SEL2(0x62c103a9ul)}}, + /* 28 */ {SEL1(0x24924925ul) 1, 5, {3, 6, 0x1cb91000ul SEL2(0x1d353d43ul)}}, + /* 29 */ {SEL1(0x8d3dcb09ul) 0, 4, {2, 6, 0x23744899ul SEL2(0xce1deceaul)}}, + /* 30 */ {SEL1(0x88888889ul) 0, 4, {2, 6, 0x2b73a840ul SEL2(0x790fc511ul)}}, + /* 31 */ {SEL1(0x08421085ul) 1, 5, {2, 6, 0x34e63b41ul SEL2(0x35b865a0ul)}}, + /* 32 */ {SEL1(0ul) 1, 5, {1, 6, 0x40000000ul SEL2(0xfffffffful)}}, + /* 33 */ {SEL1(0x3e0f83e1ul) 0, 3, {1, 6, 0x4cfa3cc1ul SEL2(0xa9aed1b3ul)}}, + /* 34 */ {SEL1(0xf0f0f0f1ul) 0, 5, {1, 6, 0x5c13d840ul SEL2(0x63dfc229ul)}}, + /* 35 */ {SEL1(0xd41d41d5ul) 1, 6, {1, 6, 0x6d91b519ul SEL2(0x2b0fee30ul)}}, + /* 36 */ {SEL1(0x38e38e39ul) 0, 3, {0, 6, 0x81bf1000ul SEL2(0xf91bd1b6ul)}} +#endif +}; + /* Lower-case digits. */ -const char _itoa_lower_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; +static const char _itoa_lower_digits[] + = "0123456789abcdefghijklmnopqrstuvwxyz"; /* Upper-case digits. */ -const char _itoa_upper_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +static const char _itoa_upper_digits[] + = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -/* Cause _itoa.h to define _itoa as a real function instead of an - `extern inline'. */ -#define _EXTERN_INLINE /* empty */ -#include "_itoa.h" +char * +_itoa (value, buflim, base, upper_case) + unsigned long long int value; + char *buflim; + unsigned int base; + int upper_case; +{ + const char *digits = upper_case ? _itoa_upper_digits : _itoa_lower_digits; + char *bp = buflim; + const struct base_table_t *brec = &base_table[base - 2]; + + switch (base) + { +#define RUN_2N(BITS) \ + do \ + { \ + /* `unsigned long long int' always has 64 bits. */ \ + mp_limb work_hi = value >> (64 - BITS_PER_MP_LIMB); \ + \ + if (BITS_PER_MP_LIMB == 32) \ + if (work_hi != 0) \ + { \ + mp_limb work_lo; \ + int cnt; \ + \ + work_lo = value & 0xfffffffful; \ + for (cnt = BITS_PER_MP_LIMB / BITS; cnt > 0; --cnt) \ + { \ + *--bp = digits[work_lo & ((1ul << BITS) - 1)]; \ + work_lo >>= BITS; \ + } \ + if (BITS_PER_MP_LIMB % BITS != 0) \ + { \ + work_lo |= ((work_hi \ + & ((1 << BITS - BITS_PER_MP_LIMB % BITS) \ + - 1)) \ + << BITS_PER_MP_LIMB % BITS); \ + *--bp = digits[work_lo]; \ + work_hi >>= BITS - BITS_PER_MP_LIMB % BITS; \ + } \ + } \ + else \ + work_hi = value & 0xfffffffful; \ + do \ + { \ + *--bp = digits[work_hi & ((1 << BITS) - 1)]; \ + work_hi >>= BITS; \ + } \ + while (work_hi != 0); \ + } \ + while (0) + case 8: + RUN_2N (3); + break; + + case 16: + RUN_2N (4); + break; + + default: + { +#if BITS_PER_MP_LIMB == 64 + mp_limb base_multiplier = brec->base_multiplier; + if (brec->flag) + while (value != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, value, base_multiplier); + quo = (x + ((value - x) >> 1)) >> (brec->post_shift - 1); + rem = value - quo * base; + *--bp = digits[rem]; + value = quo; + } + else + while (value != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, value, base_multiplier); + quo = x >> brec->post_shift; + rem = value - quo * base; + *--bp = digits[rem]; + value = quo; + } +#endif +#if BITS_PER_MP_LIMB == 32 + mp_limb t[3]; + int n; + + /* First convert x0 to 1-3 words in base s->big.base. + Optimize for frequent cases of 32 bit numbers. */ + if ((mp_limb) (value >> 32) >= 1) + { + int big_normalization_steps = brec->big.normalization_steps; + mp_limb big_base_norm = brec->big.base << big_normalization_steps; + + if ((mp_limb) (value >> 32) >= brec->big.base) + { + mp_limb x1hi, x1lo, r; + /* If you want to optimize this, take advantage of + that the quotient in the first udiv_qrnnd will + always be very small. It might be faster just to + subtract in a tight loop. */ + +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb x, xh, xl; + + if (big_normalization_steps == 0) + xh = 0; + else + xh = (mp_limb) (value >> 64 - big_normalization_steps); + xl = (mp_limb) (value >> 32 - big_normalization_steps); + udiv_qrnnd_preinv (x1hi, r, xh, xl, big_base_norm, + brec->big.base_ninv); + + xl = ((mp_limb) value) << big_normalization_steps; + udiv_qrnnd_preinv (x1lo, x, r, xl, big_base_norm, + big_normalization_steps); + t[2] = x >> big_normalization_steps; + + if (big_normalization_steps == 0) + xh = x1hi; + else + xh = ((x1hi << big_normalization_steps) + | (x1lo >> 32 - big_normalization_steps)); + xl = x1lo << big_normalization_steps; + udiv_qrnnd_preinv (t[0], x, xh, xl, big_base_norm, + big_normalization_steps); + t[1] = x >> big_normalization_steps; +#elif UDIV_NEEDS_NORMALIZATION + mp_limb x, xh, xl; + + if (big_normalization_steps == 0) + xh = 0; + else + xh = (mp_limb) (value >> 64 - big_normalization_steps); + xl = (mp_limb) (value >> 32 - big_normalization_steps); + udiv_qrnnd (x1hi, r, xh, xl, big_base_norm); + + xl = ((mp_limb) value) << big_normalization_steps; + udiv_qrnnd (x1lo, x, r, xl, big_base_norm); + t[2] = x >> big_normalization_steps; + + if (big_normalization_steps == 0) + xh = x1hi; + else + xh = ((x1hi << big_normalization_steps) + | (x1lo >> 32 - big_normalization_steps)); + xl = x1lo << big_normalization_steps; + udiv_qrnnd (t[0], x, xh, xl, big_base_norm); + t[1] = x >> big_normalization_steps; +#else + udiv_qrnnd (x1hi, r, 0, (mp_limb) (value >> 32), + brec->big.base); + udiv_qrnnd (x1lo, t[2], r, (mp_limb) value, brec->big.base); + udiv_qrnnd (t[0], t[1], x1hi, x1lo, brec->big.base); +#endif + n = 3; + } + else + { +#if (UDIV_TIME > 2 * UMUL_TIME) + mp_limb x; + + value <<= brec->big.normalization_steps; + udiv_qrnnd_preinv (t[0], x, (mp_limb) (value >> 32), + (mp_limb) value, big_base_norm, + brec->big.base_ninv); + t[1] = x >> brec->big.normalization_steps; +#elif UDIV_NEEDS_NORMALIZATION + mp_limb x; + + value <<= big_normalization_steps; + udiv_qrnnd (t[0], x, (mp_limb) (value >> 32), + (mp_limb) value, big_base_norm); + t[1] = x >> big_normalization_steps; +#else + udiv_qrnnd (t[0], t[1], (mp_limb) (value >> 32), + (mp_limb) value, brec->big.base); +#endif + n = 2; + } + } + else + { + t[0] = value; + n = 1; + } + + /* Convert the 1-3 words in t[], word by word, to ASCII. */ + do + { + mp_limb ti = t[--n]; + int ndig_for_this_limb = 0; + +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb base_multiplier = brec->base_multiplier; + if (brec->flag) + while (ti != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, ti, base_multiplier); + quo = (x + ((ti - x) >> 1)) >> (brec->post_shift - 1); + rem = ti - quo * base; + *--bp = digits[rem]; + ti = quo; + ++ndig_for_this_limb; + } + else + while (ti != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, ti, base_multiplier); + quo = x >> brec->post_shift; + rem = ti - quo * base; + *--bp = digits[rem]; + ti = quo; + ++ndig_for_this_limb; + } +#else + while (ti != 0) + { + mp_limb quo, rem; + + quo = ti / base; + rem = ti % base; + *--bp = digits[rem]; + ti = quo; + ++ndig_for_this_limb; + } +#endif + /* If this wasn't the most significant word, pad with zeros. */ + if (n != 0) + while (ndig_for_this_limb < brec->big.ndigits) + { + *--bp = '0'; + ++ndig_for_this_limb; + } + } + while (n != 0); +#endif + } + break; + } + + return bp; +} diff --git a/stdio/_itoa.h b/stdio/_itoa.h index 81240507b1..ab3d1d1d3a 100644 --- a/stdio/_itoa.h +++ b/stdio/_itoa.h @@ -21,8 +21,6 @@ Cambridge, MA 02139, USA. */ #define _ITOA_H #include -extern const char _itoa_lower_digits[], _itoa_upper_digits[]; - /* Convert VALUE into ASCII in base BASE (2..36). Write backwards starting the character just before BUFLIM. Return the address of the first (left-to-right) character in the number. @@ -31,28 +29,4 @@ extern const char _itoa_lower_digits[], _itoa_upper_digits[]; extern char *_itoa __P ((unsigned long long int value, char *buflim, unsigned int base, int upper_case)); -#ifndef _EXTERN_INLINE -#define _EXTERN_INLINE extern __inline -#endif - -_EXTERN_INLINE -char * -_itoa (unsigned long long int value, char *buflim, - unsigned int base, int upper_case) -{ - /* Base-36 digits for numbers. */ - const char *digits = upper_case ? _itoa_upper_digits : _itoa_lower_digits; - - register char *bp = buflim; - - while (value > 0) - { - *--bp = digits[value % base]; - value /= base; - } - - return bp; -} - - #endif /* itoa.h */ diff --git a/stdio/test_rdwr.c b/stdio/test_rdwr.c index 8e0c1dfade..f987f16cd4 100644 --- a/stdio/test_rdwr.c +++ b/stdio/test_rdwr.c @@ -17,6 +17,7 @@ not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include +#include #include #include #include diff --git a/stdlib/gmp-impl.h b/stdlib/gmp-impl.h index ccffe7bb1e..48d3af9761 100644 --- a/stdlib/gmp-impl.h +++ b/stdlib/gmp-impl.h @@ -19,11 +19,17 @@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #if ! defined (alloca) -#if defined (__GNUC__) || defined (__sparc__) || defined (sparc) +#if defined (__GNUC__) #define alloca __builtin_alloca #endif #endif +#if ! defined (alloca) +#if defined (__sparc__) || defined (sparc) || defined (__sgi) +#include +#endif +#endif + #ifndef NULL #define NULL 0L #endif @@ -168,6 +174,7 @@ void _mp_default_free (); else \ ____mpn_sqr_n (prodp, up, size, tspace); \ } while (0); +#define assert(trueval) do {if (!(trueval)) abort ();} while (0) /* Structure for conversion between internal binary format and strings in base 2..36. */ @@ -197,9 +204,11 @@ struct bases extern const struct bases __mp_bases[]; extern mp_size_t __gmp_default_fp_limb_precision; -/* Divide the two-limb number in (NH,,NL) by D, with DI being a 32 bit - approximation to (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). - Put the quotient in Q and the remainder in R. */ +/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest + limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). + If this would yield overflow, DI should be the largest possible number + (i.e., only ones). For correct operation, the most significant bit of D + has to be set. Put the quotient in Q and the remainder in R. */ #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ do { \ mp_limb _q, _ql, _r; \ @@ -226,6 +235,8 @@ extern mp_size_t __gmp_default_fp_limb_precision; (r) = _r; \ (q) = _q; \ } while (0) +/* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left + so that its most significant bit is set. LGUP is ceil(log2(D)). */ #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \ do { \ mp_limb n2, n10, n1, nadj, q1; \ @@ -243,6 +254,8 @@ extern mp_size_t __gmp_default_fp_limb_precision; (r) = _xl + ((d) & _xh); \ (q) = _xh - q1; \ } while (0) +/* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which + version to use. */ #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \ do { \ mp_limb n2, n10, n1, nadj, q1; \ @@ -262,22 +275,49 @@ extern mp_size_t __gmp_default_fp_limb_precision; } while (0) #if defined (__GNUC__) -/* Define stuff for longlong.h asm macros. */ -#if __GNUC_NEW_ATTR_MODE_SYNTAX -typedef unsigned int UQItype __attribute__ ((mode ("QI"))); -typedef int SItype __attribute__ ((mode ("SI"))); -typedef unsigned int USItype __attribute__ ((mode ("SI"))); -typedef int DItype __attribute__ ((mode ("DI"))); -typedef unsigned int UDItype __attribute__ ((mode ("DI"))); -#else +/* Define stuff for longlong.h. */ typedef unsigned int UQItype __attribute__ ((mode (QI))); typedef int SItype __attribute__ ((mode (SI))); typedef unsigned int USItype __attribute__ ((mode (SI))); typedef int DItype __attribute__ ((mode (DI))); typedef unsigned int UDItype __attribute__ ((mode (DI))); -#endif +#else +typedef unsigned char UQItype; +typedef long SItype; +typedef unsigned long USItype; #endif typedef mp_limb UWtype; typedef unsigned int UHWtype; #define W_TYPE_SIZE BITS_PER_MP_LIMB + + +#ifndef IEEE_DOUBLE_BIG_ENDIAN +#define IEEE_DOUBLE_BIG_ENDIAN 1 +#endif + +#if IEEE_DOUBLE_BIG_ENDIAN +union ieee_double_extract +{ + struct + { + unsigned long sig:1; + unsigned long exp:11; + unsigned long manh:20; + unsigned long manl:32; + } s; + double d; +}; +#else +union ieee_double_extract +{ + struct + { + unsigned long manl:32; + unsigned long manh:20; + unsigned long exp:11; + unsigned long sig:1; + } s; + double d; +}; +#endif diff --git a/stdlib/gmp.h b/stdlib/gmp.h index 95c2f1beba..0b2cb29014 100644 --- a/stdlib/gmp.h +++ b/stdlib/gmp.h @@ -24,13 +24,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define __need_size_t #include -#ifdef __STDC__ +#if defined (__STDC__) #define __gmp_const const #else #define __gmp_const #endif -#ifdef __GNUC__ +#if defined (__GNUC__) #define __gmp_inline inline #else #define __gmp_inline @@ -40,9 +40,14 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ typedef unsigned int mp_limb; typedef int mp_limb_signed; #else +#if _LONG_LONG_LIMB +typedef unsigned long long int mp_limb; +typedef long long int mp_limb_signed; +#else typedef unsigned long int mp_limb; typedef long int mp_limb_signed; #endif +#endif typedef mp_limb * mp_ptr; typedef __gmp_const mp_limb * mp_srcptr; @@ -52,9 +57,9 @@ typedef long int mp_exp_t; #ifndef __MP_SMALL__ typedef struct { - long int alloc; /* Number of *limbs* allocated and pointed + mp_size_t alloc; /* Number of *limbs* allocated and pointed to by the D field. */ - long int size; /* abs(SIZE) is the number of limbs + mp_size_t size; /* abs(SIZE) is the number of limbs the last field points to. If SIZE is negative this is a negative number. */ @@ -130,12 +135,16 @@ typedef __mpf_struct *mpf_ptr; typedef __gmp_const __mpq_struct *mpq_srcptr; typedef __mpq_struct *mpq_ptr; -#ifdef __STDC__ +#if defined (__STDC__) #define _PROTO(x) x #else #define _PROTO(x) () #endif +#if defined (FILE) || defined (_STDIO_H_) || defined (__STDIO_H__) || defined (H_STDIO) +#define _GMP_H_HAVE_FILE 1 +#endif + void mp_set_memory_functions _PROTO((void *(*) (size_t), void *(*) (void *, size_t, size_t), void (*) (void *, size_t))); @@ -165,7 +174,7 @@ unsigned long int mpz_get_ui _PROTO ((mpz_srcptr)); mp_limb mpz_getlimbn _PROTO ((mpz_srcptr, mp_size_t)); mp_size_t mpz_hamdist _PROTO ((mpz_srcptr, mpz_srcptr)); void mpz_init _PROTO ((mpz_ptr)); -#ifdef FILE +#ifdef _GMP_H_HAVE_FILE void mpz_inp_raw _PROTO ((mpz_ptr, FILE *)); int mpz_inp_str _PROTO ((mpz_ptr, FILE *, int)); #endif @@ -180,7 +189,7 @@ void mpz_mul _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); void mpz_mul_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); void mpz_mul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); void mpz_neg _PROTO ((mpz_ptr, mpz_srcptr)); -#ifdef FILE +#ifdef _GMP_H_HAVE_FILE void mpz_out_raw _PROTO ((FILE *, mpz_srcptr)); void mpz_out_str _PROTO ((FILE *, int, mpz_srcptr)); #endif @@ -218,6 +227,8 @@ void mpz_tdiv_qr_ui _PROTO((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int)); void mpz_tdiv_r _PROTO((mpz_ptr, mpz_srcptr, mpz_srcptr)); void mpz_tdiv_r_ui _PROTO((mpz_ptr, mpz_srcptr, unsigned long int)); +void mpz_array_init (mpz_ptr, size_t, mp_size_t); + /**************** Rational (i.e. Q) routines. ****************/ void mpq_init _PROTO ((mpq_ptr)); @@ -253,7 +264,7 @@ void mpf_dump _PROTO ((mpf_srcptr)); char *mpf_get_str _PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr)); void mpf_init _PROTO ((mpf_ptr)); void mpf_init2 _PROTO ((mpf_ptr, mp_size_t)); -#ifdef FILE +#ifdef _GMP_H_HAVE_FILE void mpf_inp_str _PROTO ((mpf_ptr, FILE *, int)); #endif void mpf_init_set _PROTO ((mpf_ptr, mpf_srcptr)); @@ -265,7 +276,7 @@ void mpf_mul _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr)); void mpf_mul_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); void mpf_mul_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); void mpf_neg _PROTO ((mpf_ptr, mpf_srcptr)); -#ifdef FILE +#ifdef _GMP_H_HAVE_FILE void mpf_out_str _PROTO ((mpf_ptr, int, size_t, FILE *)); #endif void mpf_set _PROTO ((mpf_ptr, mpf_srcptr)); @@ -335,7 +346,7 @@ mp_limb __mpn_gcd_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb)); static __gmp_inline mp_limb -#if __STDC__ +#if defined (__STDC__) __mpn_add_1 (register mp_ptr res_ptr, register mp_srcptr s1_ptr, register mp_size_t s1_size, @@ -377,7 +388,7 @@ __mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb) } static __gmp_inline mp_limb -#if __STDC__ +#if defined (__STDC__) __mpn_add (register mp_ptr res_ptr, register mp_srcptr s1_ptr, register mp_size_t s1_size, @@ -406,7 +417,7 @@ __mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size) } static __gmp_inline mp_limb -#if __STDC__ +#if defined (__STDC__) __mpn_sub_1 (register mp_ptr res_ptr, register mp_srcptr s1_ptr, register mp_size_t s1_size, @@ -448,7 +459,7 @@ __mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb) } static __gmp_inline mp_limb -#if __STDC__ +#if defined (__STDC__) __mpn_sub (register mp_ptr res_ptr, register mp_srcptr s1_ptr, register mp_size_t s1_size, @@ -477,7 +488,7 @@ __mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size) } static __gmp_inline mp_size_t -#if __STDC__ +#if defined (__STDC__) __mpn_normal_size (mp_srcptr ptr, mp_size_t size) #else __mpn_normal_size (ptr, size) @@ -512,7 +523,6 @@ __mpn_normal_size (ptr, size) /* Useful synonyms, but not quite compatible with GMP 1. */ #define mpz_div mpz_fdiv_q #define mpz_divmod mpz_fdiv_qr -#define mpz_mod mpz_fdiv_r #define mpz_div_ui mpz_fdiv_q_ui #define mpz_divmod_ui mpz_fdiv_qr_ui #define mpz_mod_ui mpz_fdiv_r_ui diff --git a/stdlib/longlong.h b/stdlib/longlong.h index 97c469d8c0..bbb92e3af8 100644 --- a/stdlib/longlong.h +++ b/stdlib/longlong.h @@ -97,7 +97,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define __AND_CLOBBER_CC , "cc" #endif /* __GNUC__ < 2 */ -#if (defined (__a29k__) || defined (___AM29K__)) && W_TYPE_SIZE == 32 +#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add %1,%4,%5 addc %0,%2,%3" \ @@ -152,6 +152,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ (pl) = __m0 * __m1; \ } while (0) #define UMUL_TIME 46 +#ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UDItype __r; \ (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ @@ -159,12 +160,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ } while (0) extern UDItype __udiv_qrnnd (); #define UDIV_TIME 220 -#endif +#endif /* LONGLONG_STANDALONE */ +#endif /* __alpha__ */ #if defined (__arm__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ - __asm__ ("adds %1,%4,%5 - adc %0,%2,%3" \ + __asm__ ("adds %1, %4, %5 + adc %0, %2, %3" \ : "=r" ((USItype)(sh)), \ "=&r" ((USItype)(sl)) \ : "%r" ((USItype)(ah)), \ @@ -172,8 +174,8 @@ extern UDItype __udiv_qrnnd (); "%r" ((USItype)(al)), \ "rI" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ - __asm__ ("subs %1,%4,%5 - sbc %0,%2,%3" \ + __asm__ ("subs %1, %4, %5 + sbc %0, %2, %3" \ : "=r" ((USItype)(sh)), \ "=&r" ((USItype)(sl)) \ : "r" ((USItype)(ah)), \ @@ -181,19 +183,19 @@ extern UDItype __udiv_qrnnd (); "r" ((USItype)(al)), \ "rI" ((USItype)(bl))) #define umul_ppmm(xh, xl, a, b) \ - __asm__ ("; Inlined umul_ppmm - mov r0,%2 lsr 16 - mov r2,%3 lsr 16 - bic r1,%2,r0 lsl 16 - bic r2,%3,r2 lsl 16 - mul %1,r1,r2 - mul r2,r0,r2 - mul r1,%0,r1 - mul %0,r0,%0 - adds r1,r2,r1 - addcs %0,%0,0x10000 - adds %1,%1,r1 lsl 16 - adc %0,%0,r1 lsr 16" \ + __asm__ ("%@ Inlined umul_ppmm + mov %|r0, %2, lsr #16 + mov %|r2, %3, lsr #16 + bic %|r1, %2, %|r0, lsl #16 + bic %|r2, %3, %|r2, lsl #16 + mul %1, %|r1, %|r2 + mul %|r2, %|r0, %|r2 + mul %|r1, %0, %|r1 + mul %0, %|r0, %0 + adds %|r1, %|r2, %|r1 + addcs %0, %0, #65536 + adds %1, %1, %|r1, lsl #16 + adc %0, %0, %|r1, lsr #16" \ : "=&r" ((USItype)(xh)), \ "=r" ((USItype)(xl)) \ : "r" ((USItype)(a)), \ @@ -296,9 +298,9 @@ extern UDItype __udiv_qrnnd (); struct {USItype __h, __l;} __i; \ } __xx; \ __asm__ ("xmpyu %1,%2,%0" \ - : "=x" (__xx.__ll) \ - : "x" ((USItype)(u)), \ - "x" ((USItype)(v))); \ + : "=fx" (__xx.__ll) \ + : "fx" ((USItype)(u)), \ + "fx" ((USItype)(v))); \ (wh) = __xx.__i.__h; \ (wl) = __xx.__i.__l; \ } while (0) @@ -308,12 +310,14 @@ extern UDItype __udiv_qrnnd (); #define UMUL_TIME 40 #define UDIV_TIME 80 #endif +#ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { USItype __r; \ (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ (r) = __r; \ } while (0) extern USItype __udiv_qrnnd (); +#endif /* LONGLONG_STANDALONE */ #define count_leading_zeros(count, x) \ do { \ USItype __tmp; \ @@ -419,8 +423,12 @@ extern USItype __udiv_qrnnd (); } while (0) #define count_trailing_zeros(count, x) \ __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))) +#ifndef UMUL_TIME #define UMUL_TIME 40 +#endif +#ifndef UDIV_TIME #define UDIV_TIME 40 +#endif #endif /* 80x86 */ #if defined (__i960__) && W_TYPE_SIZE == 32 @@ -442,7 +450,7 @@ extern USItype __udiv_qrnnd (); __w; }) #endif /* __i960__ */ -#if defined (__mc68000__) && W_TYPE_SIZE == 32 +#if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add%.l %5,%1 addx%.l %3,%0" \ @@ -489,38 +497,34 @@ extern USItype __udiv_qrnnd (); : "=d" ((USItype)(count)) \ : "od" ((USItype)(x)), "n" (0)) #else /* not mc68020 */ -#define umul_ppmm(xh, xl, a, b) \ - __asm__ ("| Inlined umul_ppmm - move%.l %2,%/d0 - move%.l %3,%/d1 - move%.l %/d0,%/d2 - swap %/d0 - move%.l %/d1,%/d3 - swap %/d1 - move%.w %/d2,%/d4 - mulu %/d3,%/d4 - mulu %/d1,%/d2 - mulu %/d0,%/d3 - mulu %/d0,%/d1 - move%.l %/d4,%/d0 - eor%.w %/d0,%/d0 - swap %/d0 - add%.l %/d0,%/d2 - add%.l %/d3,%/d2 +#define umul_ppmmxx(xh, xl, a, b) \ + do { USItype __umul_tmp1, __umul_tmp2; \ + __asm__ ("| Inlined umul_ppmm + move%.l %5,%3 + move%.l %2,%0 + move%.w %3,%1 + swap %3 + swap %0 + mulu %2,%1 + mulu %3,%0 + mulu %2,%3 + swap %2 + mulu %5,%2 + add%.l %3,%2 jcc 1f - add%.l #65536,%/d1 -1: swap %/d2 - moveq #0,%/d0 - move%.w %/d2,%/d0 - move%.w %/d4,%/d2 - move%.l %/d2,%1 - add%.l %/d1,%/d0 - move%.l %/d0,%0" \ - : "=g" ((USItype)(xh)), \ - "=g" ((USItype)(xl)) \ - : "g" ((USItype)(a)), \ - "g" ((USItype)(b)) \ - : "d0", "d1", "d2", "d3", "d4") + add%.l %#0x10000,%0 +1: move%.l %2,%3 + clr%.w %2 + swap %2 + swap %3 + clr%.w %3 + add%.l %3,%1 + addx%.l %2,%0 + | End inlined umul_ppmm" \ + : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ + "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ + : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ + } while (0) #define UMUL_TIME 100 #define UDIV_TIME 400 #endif /* not mc68020 */ @@ -553,7 +557,7 @@ extern USItype __udiv_qrnnd (); : "r" ((USItype)(x))); \ (count) = __cbtmp ^ 31; \ } while (0) -#if defined (__mc88110__) +#if defined (__m88110__) #define umul_ppmm(wh, wl, u, v) \ do { \ union {UDItype __ll; \ @@ -582,10 +586,18 @@ extern USItype __udiv_qrnnd (); #else #define UMUL_TIME 17 #define UDIV_TIME 150 -#endif /* __mc88110__ */ +#endif /* __m88110__ */ #endif /* __m88000__ */ #if defined (__mips__) && W_TYPE_SIZE == 32 +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3" \ + : "=l" ((USItype)(w0)), \ + "=h" ((USItype)(w1)) \ + : "d" ((USItype)(u)), \ + "d" ((USItype)(v))) +#else #define umul_ppmm(w1, w0, u, v) \ __asm__ ("multu %2,%3 mflo %0 @@ -594,11 +606,20 @@ extern USItype __udiv_qrnnd (); "=d" ((USItype)(w1)) \ : "d" ((USItype)(u)), \ "d" ((USItype)(v))) +#endif #define UMUL_TIME 10 #define UDIV_TIME 100 #endif /* __mips__ */ #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3" \ + : "=l" ((UDItype)(w0)), \ + "=h" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))) +#else #define umul_ppmm(w1, w0, u, v) \ __asm__ ("dmultu %2,%3 mflo %0 @@ -607,8 +628,9 @@ extern USItype __udiv_qrnnd (); "=d" ((UDItype)(w1)) \ : "d" ((UDItype)(u)), \ "d" ((UDItype)(v))) -#define UMUL_TIME 10 -#define UDIV_TIME 100 +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 140 #endif /* __mips__ */ #if defined (__ns32000__) && W_TYPE_SIZE == 32 @@ -647,7 +669,7 @@ extern USItype __udiv_qrnnd (); } while (0) #endif /* __ns32000__ */ -#if (defined (__powerpc__) || defined (___IBMR2__)) && W_TYPE_SIZE == 32 +#if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (bh) && (bh) == 0) \ @@ -676,14 +698,14 @@ extern USItype __udiv_qrnnd (); #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (ah) && (ah) == 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ : "=r" ((USItype)(sh)), \ "=&r" ((USItype)(sl)) \ : "r" ((USItype)(bh)), \ "rI" ((USItype)(al)), \ "r" ((USItype)(bl))); \ else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ : "=r" ((USItype)(sh)), \ "=&r" ((USItype)(sl)) \ : "r" ((USItype)(bh)), \ @@ -716,7 +738,7 @@ extern USItype __udiv_qrnnd (); __asm__ ("{cntlz|cntlzw} %0,%1" \ : "=r" ((USItype)(count)) \ : "r" ((USItype)(x))) -#if defined (__powerpc__) +#if defined (_ARCH_PPC) #define umul_ppmm(ph, pl, m0, m1) \ do { \ USItype __m0 = (m0), __m1 = (m1); \ @@ -785,16 +807,15 @@ extern USItype __udiv_qrnnd (); "g" ((USItype)(bh)), \ "1" ((USItype)(al)), \ "g" ((USItype)(bl))) -/* This insn doesn't work on ancient pyramids. */ +/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __xx; \ - __xx.__i.__l = u; \ - __asm__ ("uemul %3,%0" \ - : "=r" (__xx.__i.__h), \ - "=r" (__xx.__i.__l) \ - : "1" (__xx.__i.__l), \ + __asm__ ("movw %1,%R0 + uemul %2,%0" \ + : "=&r" (__xx.__ll) \ + : "g" ((USItype) (u)), \ "g" ((USItype)(v))); \ (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) #endif /* __pyr__ */ @@ -868,6 +889,20 @@ extern USItype __udiv_qrnnd (); } while (0) #endif +#if defined (__sh2__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ( \ + "dmulu.l %2,%3 + sts macl,%1 + sts mach,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "macl", "mach") +#define UMUL_TIME 5 +#endif + #if defined (__sparc__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addcc %r4,%5,%1 @@ -901,17 +936,21 @@ extern USItype __udiv_qrnnd (); : "r" ((USItype)(u)), \ "r" ((USItype)(v))) #define UMUL_TIME 5 -/* We might want to leave this undefined for `SuperSPARC (tm)' since - its implementation is crippled and often traps. */ +#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ #define udiv_qrnnd(q, r, n1, n0, d) \ - __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\ - : "=&r" ((USItype)(q)), \ - "=&r" ((USItype)(r)) \ - : "r" ((USItype)(n1)), \ - "r" ((USItype)(n0)), \ - "r" ((USItype)(d))) + do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" ((USItype)(__q)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ + } while (0) #define UDIV_TIME 25 -#else +#endif /* SUPERSPARC */ +#else /* ! __sparc_v8__ */ #if defined (__sparclite__) /* This has hardware multiply but not divide. It also has two additional instructions scan (ffs from high bit) and divscc. */ @@ -973,9 +1012,10 @@ extern USItype __udiv_qrnnd (); __asm__ ("scan %1,0,%0" \ : "=r" ((USItype)(x)) \ : "r" ((USItype)(count))) -#else -/* SPARC without integer multiplication and divide instructions. - (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */ +#endif /* __sparclite__ */ +#endif /* __sparc_v8__ */ +/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ +#ifndef umul_ppmm #define umul_ppmm(w1, w0, u, v) \ __asm__ ("! Inlined umul_ppmm wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr @@ -1023,6 +1063,9 @@ extern USItype __udiv_qrnnd (); "r" ((USItype)(v)) \ : "%g1", "%g2" __AND_CLOBBER_CC) #define UMUL_TIME 39 /* 39 instructions */ +#endif +#ifndef udiv_qrnnd +#ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { USItype __r; \ (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ @@ -1030,8 +1073,8 @@ extern USItype __udiv_qrnnd (); } while (0) extern USItype __udiv_qrnnd (); #define UDIV_TIME 140 -#endif /* __sparclite__ */ -#endif /* __sparc_v8__ */ +#endif /* LONGLONG_STANDALONE */ +#endif /* udiv_qrnnd */ #endif /* __sparc__ */ #if defined (__vax__) && W_TYPE_SIZE == 32 @@ -1075,7 +1118,7 @@ extern USItype __udiv_qrnnd (); __xx.__i.__h = n1; __xx.__i.__l = n0; \ __asm__ ("ediv %3,%2,%0,%1" \ : "=g" (q), "=g" (r) \ - : "g" (__n1n0.ll), "g" (d)); \ + : "g" (__xx.ll), "g" (d)); \ } while (0) #endif /* __vax__ */ @@ -1173,11 +1216,12 @@ extern USItype __udiv_qrnnd (); do { \ UWtype __x0, __x1, __x2, __x3; \ UHWtype __ul, __vl, __uh, __vh; \ + UWtype __u = (u), __v = (v); \ \ - __ul = __ll_lowpart (u); \ - __uh = __ll_highpart (u); \ - __vl = __ll_lowpart (v); \ - __vh = __ll_highpart (v); \ + __ul = __ll_lowpart (__u); \ + __uh = __ll_highpart (__u); \ + __vl = __ll_lowpart (__v); \ + __vh = __ll_highpart (__v); \ \ __x0 = (UWtype) __ul * __vl; \ __x1 = (UWtype) __ul * __vh; \ @@ -1194,6 +1238,17 @@ extern USItype __udiv_qrnnd (); } while (0) #endif +#if !defined (umul_ppmm) +#define smul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __w1; \ + UWtype __m0 = (u), __m1 = (v); \ + umul_ppmm (__w1, w0, __m0, __m1); \ + (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ + - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ + } while (0) +#endif + /* Define this unconditionally, so it can be used for debugging. */ #define __udiv_qrnnd_c(q, r, n1, n0, d) \ do { \ diff --git a/sysdeps/alpha/add_n.s b/sysdeps/alpha/add_n.s new file mode 100644 index 0000000000..e1ad4600f5 --- /dev/null +++ b/sysdeps/alpha/add_n.s @@ -0,0 +1,119 @@ + # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and + # store sum in a third limb vector. + + # Copyright (C) 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + + # INPUT PARAMETERS + # res_ptr $16 + # s1_ptr $17 + # s2_ptr $18 + # size $19 + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_add_n + .ent __mpn_add_n +__mpn_add_n: + .frame $30,0,$26,0 + + ldq $3,0($17) + ldq $4,0($18) + + subq $19,1,$19 + and $19,4-1,$2 # number of limbs in first loop + bis $31,$31,$0 + beq $2,.L0 # if multiple of 4 limbs, skip first loop + + subq $19,$2,$19 + +.Loop0: subq $2,1,$2 + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + + addq $17,8,$17 + addq $18,8,$18 + bis $5,$5,$3 + bis $6,$6,$4 + addq $16,8,$16 + bne $2,.Loop0 + +.L0: beq $19,.Lend + + .align 3 +.Loop: subq $19,4,$19 + + ldq $5,8($17) + addq $4,$0,$4 + ldq $6,8($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + + ldq $3,16($17) + addq $6,$0,$6 + ldq $4,16($18) + cmpult $6,$0,$1 + addq $5,$6,$6 + cmpult $6,$5,$0 + stq $6,8($16) + or $0,$1,$0 + + ldq $5,24($17) + addq $4,$0,$4 + ldq $6,24($18) + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,16($16) + or $0,$1,$0 + + ldq $3,32($17) + addq $6,$0,$6 + ldq $4,32($18) + cmpult $6,$0,$1 + addq $5,$6,$6 + cmpult $6,$5,$0 + stq $6,24($16) + or $0,$1,$0 + + addq $17,32,$17 + addq $18,32,$18 + addq $16,32,$16 + bne $19,.Loop + +.Lend: addq $4,$0,$4 + cmpult $4,$0,$1 + addq $3,$4,$4 + cmpult $4,$3,$0 + stq $4,0($16) + or $0,$1,$0 + ret $31,($26),1 + + .end __mpn_add_n diff --git a/sysdeps/alpha/addmul_1.s b/sysdeps/alpha/addmul_1.s new file mode 100644 index 0000000000..46d277df6e --- /dev/null +++ b/sysdeps/alpha/addmul_1.s @@ -0,0 +1,100 @@ + # Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + # the result to a second limb vector. + + # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + + # This file is part of the GNU MP Library. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU Library General Public License as published by + # the Free Software Foundation; either version 2 of the License, or (at your + # option) any later version. + + # The GNU MP Library is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # You should have received a copy of the GNU Library General Public License + # along with the GNU MP Library; see the file COPYING.LIB. If not, write to + # the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + + # INPUT PARAMETERS + # res_ptr r16 + # s1_ptr r17 + # size r18 + # s2_limb r19 + + # This code runs at 42 cycles/limb on the 21064. + + # To improve performance for long multiplications, we would use + # 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use + # these instructions without slowing down the general code: 1. We can + # only have two prefetches in operation at any time in the Alpha + # architecture. 2. There will seldom be any special alignment + # between RES_PTR and S1_PTR. Maybe we can simply divide the current + # loop into an inner and outer loop, having the inner loop handle + # exactly one prefetch block? + + .set noreorder + .set noat +.text + .align 3 + .globl __mpn_addmul_1 + .ent __mpn_addmul_1 2 +__mpn_addmul_1: + .frame $30,0,$26 + + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + mulq $2,$19,$3 # $3 = prod_low + ldq $5,0($16) # $5 = *res_ptr + umulh $2,$19,$0 # $0 = prod_high + beq $18,Lend1 # jump if size was == 1 + ldq $2,0($17) # $2 = s1_limb + addq $17,8,$17 # s1_ptr++ + subq $18,1,$18 # size-- + addq $5,$3,$3 + cmpult $3,$5,$4 + stq $3,0($16) + addq $16,8,$16 # res_ptr++ + beq $18,Lend2 # jump if size was == 2 + + .align 3 +Loop: mulq $2,$19,$3 # $3 = prod_low +