PATCH: intmaths for data type `long long' under Solaris2 / Sparc32
Gert Ohme
ohme@dialeasy.de
Sat Oct 6 08:36:00 GMT 2001
Hi Ed.
with the appended patch you get some code for the missing symbols __rem64,
__div64, __mul64, __urem64 and __udiv64.
Under Solaris those symbols in object-files are generated by gcc (see
gcc-2.95.x/gcc/config/sparc/sol2.h and gcc-2.95.x/gcc/config/sparc/elf.h )
The routines handle basic intmaths ( i.e. multiplication, division and
remainder) for the data-type `(un)signed long long'.
Some words to the appended files:
mul64.S i have highly optimized for Sparc32-based machines, and it should
be rewritten for Sparc64.
The c-files contain the fastest algorithms to do the divrem that i know
about. It is still missing checks like division by zero, and i measured
it to run 2-4 times slower than the highly optimized installed
os-machine-dependend library-routines.
The appended code however can serve as a base for further optimisation.
For example
gcc -O3 -S rem64.c
offers assembler code with which you can start further work.
You can of course compare this assembler code with its equivalents in the
Solaris2 based libc. Under Solaris2.6 do:
ar -x /usr/lib/libc.a divrem64.o
/usr/ccs/bin/dis divrem64.o > divrem64.ss
Take into account again that the installed libc depends on the Solaris-version
as well as the machinetype (i.e. available instruction set) of the system
that you are working on.
greetings
Gert
-----------------------------------------------------------------------------
2001-10-06 Gert Ohme <ohme@dialeasy.de>
* intmaths for data type `long long' under Solaris2 for Sparc32:
*
* sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/mul64.S: New.
* sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/div64.c: New.
* sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/rem64.c: New.
* sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/udiv64.c: New.
* sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/urem64.c: New.
============================================================
Index: sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/mul64.S
--- sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/mul64.S created
+++ sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/mul64.S Sat Oct 6 12:36:24 2001 1.1
@@ -0,0 +1,233 @@
+/* __mul64:
+ SPARC32-routine for multiplication of `unsigned long long'
+
+ Contributed by Gert Ohme (ohme@dialeasy.de)
+
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA.
+
+Description:
+ The input in terms of unsigned long long's
+ +-------------------+ +-------------------+
+ | a | * | b |
+ +-------------------+ +-------------------+
+ means in terms of unsigned longs
+ +---------+---------+ +---------+---------+
+ | a0 | a1 | * | b0 | b1 |
+ +---------+---------+ +---------+---------+
+ which translates into registers at call time
+ +---------+---------+ +---------+---------+
+ | %o0 | %o1 | * | %o2 | %o3 |
+ +---------+---------+ +---------+---------+
+ -----------------------------------------------------
+ Elementary algebraics lead to:
+ +-------------------+
+ | a1 * b1 |
+ +-------------------+
+ +-----------------------+
++ | a0 * b1 |
+ +-----------------------+
+ +-----------------------+
++ | a1 * b0 |
+ +-----------------------+
+ +-------------------+
++ | a0 * b0 |
+ +-------------------+
+ -----------------------------------------------------
+ or in terms of registers (where we omit the unused
+ high value 64 bits):
+ +-------------------+
+ Step 1: | %o1 * %o3 |
+ +-------------------+
+ +-----------------------+
+ Step 2: | %o0 * %o3 |
+ +-----------------------+
+ +-----------------------+
+ Step 3: | %o1 * %o2 |
+ +-----------------------+
+
+ Step 4: sum up
+ -----------------------------------------------------
+ With the usage of the %y-register the output of the partial
+ multiplications looks like (where we again omit all high value
+ bits 64 and above):
+ +---------+---------+
+ Step 1: | %o4 | %y |
+ +---------+---------+
+ +---------+---------+
+ Step 2: | | %y |
+ +---------+---------+
+ +---------+---------+
+ Step 3: | | %y |
+ +---------+---------+
+
+ Step 4: sum up
+-----------------------------------------------------
+ The result
+ +-------------------+
+ | result |
+ +-------------------+
+ is placed into the registers %o0 and %o1 for the calling program:
+ +---------+---------+
+ | %o0 | %o1 |
+ +---------+---------+
+*/
+
+.section ".text"
+ .align 4;
+ .global __mul64 ;
+ .type __mul64 ,@function;
+__mul64:
+ /* This is a leaf routine. So we can leave %fp alone */
+
+ /* Step 1 */
+ mov %o1, %y ! multiplier -> Y
+ nop
+ andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
+
+ mulscc %o4, %o3, %o4 ! 1
+ mulscc %o4, %o3, %o4 ! 2
+ mulscc %o4, %o3, %o4 ! 3
+ mulscc %o4, %o3, %o4 ! 4
+ mulscc %o4, %o3, %o4 ! 5
+ mulscc %o4, %o3, %o4 ! 6
+ mulscc %o4, %o3, %o4 ! 7
+ mulscc %o4, %o3, %o4 ! 8
+ mulscc %o4, %o3, %o4 ! 9
+ mulscc %o4, %o3, %o4 ! 10
+ mulscc %o4, %o3, %o4 ! 11
+ mulscc %o4, %o3, %o4 ! 12
+ mulscc %o4, %o3, %o4 ! 13
+ mulscc %o4, %o3, %o4 ! 14
+ mulscc %o4, %o3, %o4 ! 15
+ mulscc %o4, %o3, %o4 ! 16
+ mulscc %o4, %o3, %o4 ! 17
+ mulscc %o4, %o3, %o4 ! 18
+ mulscc %o4, %o3, %o4 ! 19
+ mulscc %o4, %o3, %o4 ! 20
+ mulscc %o4, %o3, %o4 ! 21
+ mulscc %o4, %o3, %o4 ! 22
+ mulscc %o4, %o3, %o4 ! 23
+ mulscc %o4, %o3, %o4 ! 24
+ mulscc %o4, %o3, %o4 ! 25
+ mulscc %o4, %o3, %o4 ! 26
+ mulscc %o4, %o3, %o4 ! 27
+ mulscc %o4, %o3, %o4 ! 28
+ mulscc %o4, %o3, %o4 ! 29
+ mulscc %o4, %o3, %o4 ! 30
+ mulscc %o4, %o3, %o4 ! 31
+ mulscc %o4, %o3, %o4 ! 32
+ mulscc %o4, %g0, %o4 ! Final shift
+ mov %y, %l1 ! Y -> lower result
+ /* sign bit correction for unsigned ;
+ this sign bit would be visible in %o0 at the time of retl */
+ sra %o3, 31, %o5 ! make mask from sign bit
+ and %o1, %o5, %o5 ! %o5 = 0 or %o1, depending on sign of %o1
+ addcc %o4, %o5, %l4 ! add compensation and put upper half in place
+
+
+ /* Step 2 */
+ mov %o0, %y ! multiplier -> Y
+ nop
+ andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
+ mulscc %o4, %o3, %o4 ! 1
+ mulscc %o4, %o3, %o4 ! 2
+ mulscc %o4, %o3, %o4 ! 3
+ mulscc %o4, %o3, %o4 ! 4
+ mulscc %o4, %o3, %o4 ! 5
+ mulscc %o4, %o3, %o4 ! 6
+ mulscc %o4, %o3, %o4 ! 7
+ mulscc %o4, %o3, %o4 ! 8
+ mulscc %o4, %o3, %o4 ! 9
+ mulscc %o4, %o3, %o4 ! 10
+ mulscc %o4, %o3, %o4 ! 11
+ mulscc %o4, %o3, %o4 ! 12
+ mulscc %o4, %o3, %o4 ! 13
+ mulscc %o4, %o3, %o4 ! 14
+ mulscc %o4, %o3, %o4 ! 15
+ mulscc %o4, %o3, %o4 ! 16
+ mulscc %o4, %o3, %o4 ! 17
+ mulscc %o4, %o3, %o4 ! 18
+ mulscc %o4, %o3, %o4 ! 19
+ mulscc %o4, %o3, %o4 ! 20
+ mulscc %o4, %o3, %o4 ! 21
+ mulscc %o4, %o3, %o4 ! 22
+ mulscc %o4, %o3, %o4 ! 23
+ mulscc %o4, %o3, %o4 ! 24
+ mulscc %o4, %o3, %o4 ! 25
+ mulscc %o4, %o3, %o4 ! 26
+ mulscc %o4, %o3, %o4 ! 27
+ mulscc %o4, %o3, %o4 ! 28
+ mulscc %o4, %o3, %o4 ! 29
+ mulscc %o4, %o3, %o4 ! 30
+ mulscc %o4, %o3, %o4 ! 31
+ mulscc %o4, %o3, %o4 ! 32
+ mulscc %o4, %g0, %o4 ! Final shift
+ mov %y, %l2 ! Y -> lower result
+ nop
+
+ /* Step 3 */
+ mov %o2, %y ! multiplier -> Y
+ nop
+ andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %o1, %o4 ! 13
+ mulscc %o4, %o1, %o4 ! 14
+ mulscc %o4, %o1, %o4 ! 15
+ mulscc %o4, %o1, %o4 ! 16
+ mulscc %o4, %o1, %o4 ! 17
+ mulscc %o4, %o1, %o4 ! 18
+ mulscc %o4, %o1, %o4 ! 19
+ mulscc %o4, %o1, %o4 ! 20
+ mulscc %o4, %o1, %o4 ! 21
+ mulscc %o4, %o1, %o4 ! 22
+ mulscc %o4, %o1, %o4 ! 23
+ mulscc %o4, %o1, %o4 ! 24
+ mulscc %o4, %o1, %o4 ! 25
+ mulscc %o4, %o1, %o4 ! 26
+ mulscc %o4, %o1, %o4 ! 27
+ mulscc %o4, %o1, %o4 ! 28
+ mulscc %o4, %o1, %o4 ! 29
+ mulscc %o4, %o1, %o4 ! 30
+ mulscc %o4, %o1, %o4 ! 31
+ mulscc %o4, %o1, %o4 ! 32
+ mulscc %o4, %g0, %o4 ! Final shift
+ mov %y, %l3 ! Y -> lower result
+ nop
+
+ /* Step 4 */
+ add %l2, %l3, %o0 ! add high value result
+ add %o0, %l4, %o0 ! add high value result
+
+
+ retl
+ mov %l1, %o1 ! low value result
+
+.LLfe1:
+ .size __mul64,.LLfe1-__mul64
============================================================
Index: sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/div64.c
--- sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/div64.c created
+++ sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/div64.c Sat Oct 6 12:38:43 2001 1.1
@@ -0,0 +1,43 @@
+/* __div64:
+ Division of signed long long 's
+
+ Contributed by Gert Ohme (ohme@dialeasy.de)
+
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+long long
+__div64 ( long long x, long long y )
+{
+ unsigned long long a = (x < 0) ? -x : x;
+ unsigned long long b = (y < 0) ? -y : y;
+ unsigned long long res = 0, d = 1;
+
+
+ if (b > 0) while (b < a) b <<= 1, d <<= 1;
+
+ do
+ {
+ if ( a >= b ) a -= b, res += d;
+ b >>= 1;
+ d >>= 1;
+ } while (d);
+
+
+ return ( ((x ^ y) & (1ll<<63)) == 0) ? res : -(long long)res;
+}
============================================================
Index: sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/rem64.c
--- sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/rem64.c created
+++ sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/rem64.c Sat Oct 6 12:42:43 2001 1.1
@@ -0,0 +1,41 @@
+/* __rem64:
+ Remainder of a division of signed long long 's
+
+ Contributed by Gert Ohme (ohme@dialeasy.de)
+
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+long long
+__rem64 ( long long x, long long y )
+{
+ unsigned long long a = (x < 0) ? -x : x;
+ unsigned long long b = (y < 0) ? -y : y;
+ unsigned long long d = 1;
+
+ if (b > 0) while (b < a) b <<= 1, d <<= 1;
+
+ do
+ {
+ if ( a >= b ) a -= b;
+ b >>= 1;
+ d >>= 1;
+ } while (d);
+
+ return ( ( x & (1ll<<63) ) == 0) ? a : -(long long)a;
+}
============================================================
Index: sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/udiv64.c
--- sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/udiv64.c created
+++ sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/udiv64.c Sat Oct 6 12:41:58 2001 1.1
@@ -0,0 +1,45 @@
+/* __udiv64:
+ Division of unsigned long long 's
+
+ Contributed by Gert Ohme (ohme@dialeasy.de)
+
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+unsigned long long
+__udiv64 ( unsigned long long x, unsigned long long y )
+{
+ unsigned long long res = 0, d = 1;
+ unsigned long long e = 1ll<<63;
+
+ if (x == 0) return (0);
+
+ while ((x & e) == 0)
+ e >>= 1;
+
+ if (y > 0) while (y < e) y <<= 1, d <<= 1;
+
+ do
+ {
+ if (x >= y) x -= y, res += d;
+ y >>= 1;
+ d >>= 1;
+ } while (d);
+
+ return res;
+}
============================================================
Index: sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/urem64.c
--- sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/urem64.c created
+++ sysdeps/unix/sysv/sysv4/solaris2/sparc/sparc32/urem64.c Sat Oct 6 12:43:17 2001 1.1
@@ -0,0 +1,45 @@
+/* __urem64:
+ Remainder of a division of unsigned long long 's
+
+ Contributed by Gert Ohme (ohme@dialeasy.de)
+
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+unsigned long long
+__urem64 ( unsigned long long x, unsigned long long y )
+{
+ unsigned long long d = 1;
+ unsigned long long e = 1ll<<63;
+
+ if (x == 0) return (0);
+
+ while ((x & e) == 0)
+ e >>= 1;
+
+ if (y > 0) while (y < e) y <<= 1, d <<= 1;
+
+ do
+ {
+ if (x >= y) x -= y;
+ y >>= 1;
+ d >>= 1;
+ } while (d);
+
+ return x;
+}
More information about the Libc-alpha
mailing list