]>
Commit | Line | Data |
---|---|---|
6bc31da0 | 1 | /* ix87 specific implementation of pow function. |
dff8da6b | 2 | Copyright (C) 1996-2024 Free Software Foundation, Inc. |
6bc31da0 | 3 | This file is part of the GNU C Library. |
6bc31da0 UD |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
6bc31da0 UD |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 13 | Lesser General Public License for more details. |
6bc31da0 | 14 | |
41bdb6e2 | 15 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
6bc31da0 UD |
18 | |
19 | #include <machine/asm.h> | |
6f0f237b | 20 | #include <i386-math-asm.h> |
220622dd | 21 | #include <libm-alias-finite.h> |
6bc31da0 | 22 | |
0ac5ae23 | 23 | .section .rodata.cst8,"aM",@progbits,8 |
622c86f4 | 24 | |
0ac5ae23 | 25 | .p2align 3 |
b67e9372 | 26 | .type one,@object |
0ac5ae23 UD |
27 | one: .double 1.0 |
28 | ASM_SIZE_DIRECTIVE(one) | |
b67e9372 | 29 | .type limit,@object |
0ac5ae23 UD |
30 | limit: .double 0.29 |
31 | ASM_SIZE_DIRECTIVE(limit) | |
b67e9372 | 32 | .type p63,@object |
0ac5ae23 UD |
33 | p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 |
34 | ASM_SIZE_DIRECTIVE(p63) | |
b67e9372 | 35 | .type p10,@object |
c483f6b4 JM |
36 | p10: .byte 0, 0, 0, 0, 0, 0, 0x90, 0x40 |
37 | ASM_SIZE_DIRECTIVE(p10) | |
6bc31da0 | 38 | |
0ac5ae23 | 39 | .section .rodata.cst16,"aM",@progbits,16 |
622c86f4 | 40 | |
0ac5ae23 | 41 | .p2align 3 |
b67e9372 | 42 | .type infinity,@object |
0d8733c4 UD |
43 | inf_zero: |
44 | infinity: | |
45 | .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f | |
46 | ASM_SIZE_DIRECTIVE(infinity) | |
b67e9372 | 47 | .type zero,@object |
0d8733c4 UD |
48 | zero: .double 0.0 |
49 | ASM_SIZE_DIRECTIVE(zero) | |
b67e9372 | 50 | .type minf_mzero,@object |
0d8733c4 UD |
51 | minf_mzero: |
52 | minfinity: | |
53 | .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff | |
54 | mzero: | |
55 | .byte 0, 0, 0, 0, 0, 0, 0, 0x80 | |
56 | ASM_SIZE_DIRECTIVE(minf_mzero) | |
6ace3938 | 57 | DEFINE_DBL_MIN |
6bc31da0 UD |
58 | |
59 | #ifdef PIC | |
0ac5ae23 UD |
60 | # define MO(op) op##@GOTOFF(%ecx) |
61 | # define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) | |
6bc31da0 | 62 | #else |
0ac5ae23 UD |
63 | # define MO(op) op |
64 | # define MOX(op,x,f) op(,x,f) | |
6bc31da0 UD |
65 | #endif |
66 | ||
67 | .text | |
68 | ENTRY(__ieee754_pow) | |
0d8733c4 UD |
69 | fldl 12(%esp) // y |
70 | fxam | |
e61abf83 UD |
71 | |
72 | #ifdef PIC | |
fee732e5 | 73 | LOAD_PIC_REG (cx) |
e61abf83 UD |
74 | #endif |
75 | ||
0d8733c4 UD |
76 | fnstsw |
77 | movb %ah, %dl | |
78 | andb $0x45, %ah | |
79 | cmpb $0x40, %ah // is y == 0 ? | |
80 | je 11f | |
81 | ||
5afe4c0d | 82 | cmpb $0x05, %ah // is y == ±inf ? |
0d8733c4 UD |
83 | je 12f |
84 | ||
85 | cmpb $0x01, %ah // is y == NaN ? | |
86 | je 30f | |
6bc31da0 | 87 | |
0d8733c4 UD |
88 | fldl 4(%esp) // x : y |
89 | ||
6bc31da0 | 90 | subl $8,%esp |
fee732e5 | 91 | cfi_adjust_cfa_offset (8) |
6bc31da0 | 92 | |
0d8733c4 UD |
93 | fxam |
94 | fnstsw | |
95 | movb %ah, %dh | |
96 | andb $0x45, %ah | |
97 | cmpb $0x40, %ah | |
5afe4c0d | 98 | je 20f // x is ±0 |
0d8733c4 UD |
99 | |
100 | cmpb $0x05, %ah | |
5afe4c0d | 101 | je 15f // x is ±inf |
0d8733c4 | 102 | |
66294491 JM |
103 | cmpb $0x01, %ah |
104 | je 32f // x is NaN | |
105 | ||
0d8733c4 UD |
106 | fxch // y : x |
107 | ||
164f863e UD |
108 | /* fistpll raises invalid exception for |y| >= 1L<<63. */ |
109 | fld %st // y : y : x | |
110 | fabs // |y| : y : x | |
111 | fcompl MO(p63) // y : x | |
112 | fnstsw | |
113 | sahf | |
114 | jnc 2f | |
115 | ||
6bc31da0 UD |
116 | /* First see whether `y' is a natural number. In this case we |
117 | can use a more precise algorithm. */ | |
118 | fld %st // y : y : x | |
119 | fistpll (%esp) // y : x | |
120 | fildll (%esp) // int(y) : y : x | |
121 | fucomp %st(1) // y : x | |
122 | fnstsw | |
123 | sahf | |
d6270972 | 124 | jne 3f |
6bc31da0 | 125 | |
c483f6b4 JM |
126 | /* OK, we have an integer value for y. If large enough that |
127 | errors may propagate out of the 11 bits excess precision, use | |
128 | the algorithm for real exponent instead. */ | |
129 | fld %st // y : y : x | |
130 | fabs // |y| : y : x | |
131 | fcompl MO(p10) // y : x | |
132 | fnstsw | |
133 | sahf | |
134 | jnc 2f | |
6bc31da0 | 135 | popl %eax |
fee732e5 | 136 | cfi_adjust_cfa_offset (-4) |
6bc31da0 | 137 | popl %edx |
fee732e5 | 138 | cfi_adjust_cfa_offset (-4) |
0d8733c4 UD |
139 | orl $0, %edx |
140 | fstp %st(0) // x | |
141 | jns 4f // y >= 0, jump | |
6bc31da0 UD |
142 | fdivrl MO(one) // 1/x (now referred to as x) |
143 | negl %eax | |
144 | adcl $0, %edx | |
145 | negl %edx | |
146 | 4: fldl MO(one) // 1 : x | |
147 | fxch | |
148 | ||
4da6db51 JM |
149 | /* If y is even, take the absolute value of x. Otherwise, |
150 | ensure all intermediate values that might overflow have the | |
151 | sign of x. */ | |
152 | testb $1, %al | |
153 | jnz 6f | |
154 | fabs | |
155 | ||
6bc31da0 UD |
156 | 6: shrdl $1, %edx, %eax |
157 | jnc 5f | |
158 | fxch | |
4da6db51 | 159 | fabs |
6bc31da0 UD |
160 | fmul %st(1) // x : ST*x |
161 | fxch | |
4da6db51 JM |
162 | 5: fld %st // x : x : ST*x |
163 | fabs // |x| : x : ST*x | |
164 | fmulp // |x|*x : ST*x | |
b22fc5f5 | 165 | shrl $1, %edx |
6bc31da0 UD |
166 | movl %eax, %ecx |
167 | orl %edx, %ecx | |
168 | jnz 6b | |
169 | fstp %st(0) // ST*x | |
6ace3938 JM |
170 | #ifdef PIC |
171 | LOAD_PIC_REG (cx) | |
172 | #endif | |
173 | DBL_NARROW_EVAL_UFLOW_NONNAN | |
6571c570 UD |
174 | ret |
175 | ||
5afe4c0d | 176 | /* y is ±NAN */ |
6571c570 UD |
177 | 30: fldl 4(%esp) // x : y |
178 | fldl MO(one) // 1.0 : x : y | |
179 | fucomp %st(1) // x : y | |
180 | fnstsw | |
181 | sahf | |
182 | je 31f | |
183 | fxch // y : x | |
184 | 31: fstp %st(1) | |
185 | ret | |
6bc31da0 | 186 | |
66294491 JM |
187 | cfi_adjust_cfa_offset (8) |
188 | 32: addl $8, %esp | |
189 | cfi_adjust_cfa_offset (-8) | |
190 | fstp %st(1) | |
191 | ret | |
192 | ||
fee732e5 | 193 | cfi_adjust_cfa_offset (8) |
6bc31da0 | 194 | .align ALIGNARG(4) |
c483f6b4 JM |
195 | 2: // y is a large integer (absolute value at least 1L<<10), but |
196 | // may be odd unless at least 1L<<64. So it may be necessary | |
197 | // to adjust the sign of a negative result afterwards. | |
d6270972 JM |
198 | fxch // x : y |
199 | fabs // |x| : y | |
200 | fxch // y : x | |
201 | .align ALIGNARG(4) | |
202 | 3: /* y is a real number. */ | |
6bc31da0 UD |
203 | fxch // x : y |
204 | fldl MO(one) // 1.0 : x : y | |
8f3edfee UD |
205 | fldl MO(limit) // 0.29 : 1.0 : x : y |
206 | fld %st(2) // x : 0.29 : 1.0 : x : y | |
207 | fsub %st(2) // x-1 : 0.29 : 1.0 : x : y | |
208 | fabs // |x-1| : 0.29 : 1.0 : x : y | |
209 | fucompp // 1.0 : x : y | |
6bc31da0 UD |
210 | fnstsw |
211 | fxch // x : 1.0 : y | |
212 | sahf | |
213 | ja 7f | |
214 | fsub %st(1) // x-1 : 1.0 : y | |
215 | fyl2xp1 // log2(x) : y | |
216 | jmp 8f | |
217 | ||
218 | 7: fyl2x // log2(x) : y | |
219 | 8: fmul %st(1) // y*log2(x) : y | |
220 | fst %st(1) // y*log2(x) : y*log2(x) | |
221 | frndint // int(y*log2(x)) : y*log2(x) | |
222 | fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) | |
223 | fxch // fract(y*log2(x)) : int(y*log2(x)) | |
224 | f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x)) | |
225 | faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x)) | |
4da6db51 JM |
226 | |
227 | // Before scaling, we must negate if x is negative and y is an | |
228 | // odd integer. | |
c483f6b4 | 229 | testb $2, %dh |
4da6db51 | 230 | jz 291f |
c483f6b4 | 231 | // x is negative. If y is an odd integer, negate the result. |
4da6db51 JM |
232 | fldl 20(%esp) // y : 2^fract(y*log2(x)) : int(y*log2(x)) |
233 | fld %st // y : y : 2^fract(y*log2(x)) : int(y*log2(x)) | |
234 | fabs // |y| : y : 2^fract(y*log2(x)) : int(y*log2(x)) | |
235 | fcompl MO(p63) // y : 2^fract(y*log2(x)) : int(y*log2(x)) | |
c483f6b4 JM |
236 | fnstsw |
237 | sahf | |
4da6db51 | 238 | jnc 290f |
c483f6b4 JM |
239 | |
240 | // We must find out whether y is an odd integer. | |
4da6db51 JM |
241 | fld %st // y : y : 2^fract(y*log2(x)) : int(y*log2(x)) |
242 | fistpll (%esp) // y : 2^fract(y*log2(x)) : int(y*log2(x)) | |
243 | fildll (%esp) // int(y) : y : 2^fract(y*log2(x)) : int(y*log2(x)) | |
244 | fucompp // 2^fract(y*log2(x)) : int(y*log2(x)) | |
c483f6b4 JM |
245 | fnstsw |
246 | sahf | |
4da6db51 | 247 | jne 291f |
c483f6b4 JM |
248 | |
249 | // OK, the value is an integer, but is it odd? | |
250 | popl %eax | |
251 | cfi_adjust_cfa_offset (-4) | |
252 | popl %edx | |
253 | cfi_adjust_cfa_offset (-4) | |
254 | andb $1, %al | |
4da6db51 | 255 | jz 292f // jump if not odd |
c483f6b4 JM |
256 | // It's an odd integer. |
257 | fchs | |
4da6db51 JM |
258 | jmp 292f |
259 | ||
c483f6b4 | 260 | cfi_adjust_cfa_offset (8) |
4da6db51 JM |
261 | 290: fstp %st(0) // 2^fract(y*log2(x)) : int(y*log2(x)) |
262 | 291: addl $8, %esp | |
c483f6b4 | 263 | cfi_adjust_cfa_offset (-8) |
4da6db51 JM |
264 | 292: fscale // +/- 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x)) |
265 | fstp %st(1) // +/- 2^fract(y*log2(x))*2^int(y*log2(x)) | |
6ace3938 | 266 | DBL_NARROW_EVAL_UFLOW_NONNAN |
6bc31da0 | 267 | ret |
0d8733c4 UD |
268 | |
269 | ||
5afe4c0d | 270 | // pow(x,±0) = 1 |
0d8733c4 UD |
271 | .align ALIGNARG(4) |
272 | 11: fstp %st(0) // pop y | |
273 | fldl MO(one) | |
274 | ret | |
275 | ||
5afe4c0d | 276 | // y == ±inf |
0d8733c4 UD |
277 | .align ALIGNARG(4) |
278 | 12: fstp %st(0) // pop y | |
8f3edfee UD |
279 | fldl MO(one) // 1 |
280 | fldl 4(%esp) // x : 1 | |
281 | fabs // abs(x) : 1 | |
282 | fucompp // < 1, == 1, or > 1 | |
0d8733c4 UD |
283 | fnstsw |
284 | andb $0x45, %ah | |
285 | cmpb $0x45, %ah | |
286 | je 13f // jump if x is NaN | |
287 | ||
288 | cmpb $0x40, %ah | |
289 | je 14f // jump if |x| == 1 | |
290 | ||
291 | shlb $1, %ah | |
292 | xorb %ah, %dl | |
293 | andl $2, %edx | |
294 | fldl MOX(inf_zero, %edx, 4) | |
295 | ret | |
296 | ||
297 | .align ALIGNARG(4) | |
6571c570 | 298 | 14: fldl MO(one) |
0d8733c4 UD |
299 | ret |
300 | ||
301 | .align ALIGNARG(4) | |
302 | 13: fldl 4(%esp) // load x == NaN | |
303 | ret | |
304 | ||
fee732e5 | 305 | cfi_adjust_cfa_offset (8) |
0d8733c4 | 306 | .align ALIGNARG(4) |
5afe4c0d | 307 | // x is ±inf |
0d8733c4 UD |
308 | 15: fstp %st(0) // y |
309 | testb $2, %dh | |
310 | jz 16f // jump if x == +inf | |
311 | ||
2460d3aa JM |
312 | // fistpll raises invalid exception for |y| >= 1L<<63, so test |
313 | // that (in which case y is certainly even) before testing | |
314 | // whether y is odd. | |
315 | fld %st // y : y | |
316 | fabs // |y| : y | |
317 | fcompl MO(p63) // y | |
318 | fnstsw | |
319 | sahf | |
320 | jnc 16f | |
321 | ||
0d8733c4 UD |
322 | // We must find out whether y is an odd integer. |
323 | fld %st // y : y | |
324 | fistpll (%esp) // y | |
325 | fildll (%esp) // int(y) : y | |
326 | fucompp // <empty> | |
327 | fnstsw | |
328 | sahf | |
329 | jne 17f | |
330 | ||
2460d3aa | 331 | // OK, the value is an integer. |
0d8733c4 | 332 | popl %eax |
fee732e5 | 333 | cfi_adjust_cfa_offset (-4) |
0d8733c4 | 334 | popl %edx |
fee732e5 | 335 | cfi_adjust_cfa_offset (-4) |
0d8733c4 UD |
336 | andb $1, %al |
337 | jz 18f // jump if not odd | |
0d8733c4 UD |
338 | // It's an odd integer. |
339 | shrl $31, %edx | |
340 | fldl MOX(minf_mzero, %edx, 8) | |
341 | ret | |
342 | ||
fee732e5 | 343 | cfi_adjust_cfa_offset (8) |
0d8733c4 UD |
344 | .align ALIGNARG(4) |
345 | 16: fcompl MO(zero) | |
346 | addl $8, %esp | |
fee732e5 | 347 | cfi_adjust_cfa_offset (-8) |
0d8733c4 UD |
348 | fnstsw |
349 | shrl $5, %eax | |
350 | andl $8, %eax | |
351 | fldl MOX(inf_zero, %eax, 1) | |
352 | ret | |
353 | ||
fee732e5 | 354 | cfi_adjust_cfa_offset (8) |
0d8733c4 UD |
355 | .align ALIGNARG(4) |
356 | 17: shll $30, %edx // sign bit for y in right position | |
357 | addl $8, %esp | |
fee732e5 | 358 | cfi_adjust_cfa_offset (-8) |
0d8733c4 UD |
359 | 18: shrl $31, %edx |
360 | fldl MOX(inf_zero, %edx, 8) | |
361 | ret | |
362 | ||
fee732e5 | 363 | cfi_adjust_cfa_offset (8) |
0d8733c4 | 364 | .align ALIGNARG(4) |
5afe4c0d | 365 | // x is ±0 |
0d8733c4 UD |
366 | 20: fstp %st(0) // y |
367 | testb $2, %dl | |
368 | jz 21f // y > 0 | |
369 | ||
5afe4c0d | 370 | // x is ±0 and y is < 0. We must find out whether y is an odd integer. |
0d8733c4 UD |
371 | testb $2, %dh |
372 | jz 25f | |
373 | ||
2460d3aa JM |
374 | // fistpll raises invalid exception for |y| >= 1L<<63, so test |
375 | // that (in which case y is certainly even) before testing | |
376 | // whether y is odd. | |
377 | fld %st // y : y | |
378 | fabs // |y| : y | |
379 | fcompl MO(p63) // y | |
380 | fnstsw | |
381 | sahf | |
382 | jnc 25f | |
383 | ||
0d8733c4 UD |
384 | fld %st // y : y |
385 | fistpll (%esp) // y | |
386 | fildll (%esp) // int(y) : y | |
387 | fucompp // <empty> | |
388 | fnstsw | |
389 | sahf | |
390 | jne 26f | |
391 | ||
2460d3aa | 392 | // OK, the value is an integer. |
0d8733c4 | 393 | popl %eax |
fee732e5 | 394 | cfi_adjust_cfa_offset (-4) |
0d8733c4 | 395 | popl %edx |
fee732e5 | 396 | cfi_adjust_cfa_offset (-4) |
0d8733c4 UD |
397 | andb $1, %al |
398 | jz 27f // jump if not odd | |
0d8733c4 UD |
399 | // It's an odd integer. |
400 | // Raise divide-by-zero exception and get minus infinity value. | |
401 | fldl MO(one) | |
402 | fdivl MO(zero) | |
403 | fchs | |
404 | ret | |
405 | ||
fee732e5 | 406 | cfi_adjust_cfa_offset (8) |
0d8733c4 | 407 | 25: fstp %st(0) |
aff6dc6c | 408 | 26: addl $8, %esp |
fee732e5 | 409 | cfi_adjust_cfa_offset (-8) |
0d8733c4 UD |
410 | 27: // Raise divide-by-zero exception and get infinity value. |
411 | fldl MO(one) | |
412 | fdivl MO(zero) | |
413 | ret | |
414 | ||
fee732e5 | 415 | cfi_adjust_cfa_offset (8) |
0d8733c4 | 416 | .align ALIGNARG(4) |
5afe4c0d | 417 | // x is ±0 and y is > 0. We must find out whether y is an odd integer. |
0d8733c4 UD |
418 | 21: testb $2, %dh |
419 | jz 22f | |
420 | ||
2460d3aa JM |
421 | // fistpll raises invalid exception for |y| >= 1L<<63, so test |
422 | // that (in which case y is certainly even) before testing | |
423 | // whether y is odd. | |
424 | fcoml MO(p63) // y | |
425 | fnstsw | |
426 | sahf | |
427 | jnc 22f | |
428 | ||
0d8733c4 UD |
429 | fld %st // y : y |
430 | fistpll (%esp) // y | |
431 | fildll (%esp) // int(y) : y | |
432 | fucompp // <empty> | |
433 | fnstsw | |
434 | sahf | |
435 | jne 23f | |
436 | ||
2460d3aa | 437 | // OK, the value is an integer. |
0d8733c4 | 438 | popl %eax |
fee732e5 | 439 | cfi_adjust_cfa_offset (-4) |
0d8733c4 | 440 | popl %edx |
fee732e5 | 441 | cfi_adjust_cfa_offset (-4) |
0d8733c4 UD |
442 | andb $1, %al |
443 | jz 24f // jump if not odd | |
0d8733c4 UD |
444 | // It's an odd integer. |
445 | fldl MO(mzero) | |
446 | ret | |
447 | ||
fee732e5 | 448 | cfi_adjust_cfa_offset (8) |
0d8733c4 | 449 | 22: fstp %st(0) |
899a2827 | 450 | 23: addl $8, %esp // Don't use 2 x pop |
fee732e5 | 451 | cfi_adjust_cfa_offset (-8) |
0d8733c4 UD |
452 | 24: fldl MO(zero) |
453 | ret | |
454 | ||
6bc31da0 | 455 | END(__ieee754_pow) |
220622dd | 456 | libm_alias_finite (__ieee754_pow, __pow) |