]> sourceware.org Git - glibc.git/blame - soft-fp/op-2.h
Implement fma in soft-fp.
[glibc.git] / soft-fp / op-2.h
CommitLineData
d876f532
UD
1/* Software floating-point emulation.
2 Basic two-word fraction declaration and manipulation.
568035b7 3 Copyright (C) 1997-2013 Free Software Foundation, Inc.
d876f532
UD
4 This file is part of the GNU C Library.
5 Contributed by Richard Henderson (rth@cygnus.com),
6 Jakub Jelinek (jj@ultra.linux.cz),
7 David S. Miller (davem@redhat.com) and
8 Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
d876f532 14
638a783c
RM
15 In addition to the permissions in the GNU Lesser General Public
16 License, the Free Software Foundation gives you unlimited
17 permission to link the compiled version of this file into
18 combinations with other programs, and to distribute those
19 combinations without any restriction coming from the use of this
20 file. (The Lesser General Public License restrictions do apply in
21 other respects; for example, they cover modification of the file,
22 and distribution when not linked into a combine executable.)
23
d876f532
UD
24 The GNU C Library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 27 Lesser General Public License for more details.
d876f532 28
41bdb6e2 29 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
30 License along with the GNU C Library; if not, see
31 <http://www.gnu.org/licenses/>. */
d876f532
UD
32
33#define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1
34#define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
35#define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
36#define _FP_FRAC_HIGH_2(X) (X##_f1)
37#define _FP_FRAC_LOW_2(X) (X##_f0)
38#define _FP_FRAC_WORD_2(X,w) (X##_f##w)
39
fe0b1e85
RM
40#define _FP_FRAC_SLL_2(X,N) \
41(void)(((N) < _FP_W_TYPE_SIZE) \
42 ? ({ \
43 if (__builtin_constant_p(N) && (N) == 1) \
44 { \
45 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
46 X##_f0 += X##_f0; \
47 } \
48 else \
49 { \
50 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
51 X##_f0 <<= (N); \
52 } \
53 0; \
54 }) \
55 : ({ \
56 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
57 X##_f0 = 0; \
58 }))
59
d876f532
UD
60
61#define _FP_FRAC_SRL_2(X,N) \
fe0b1e85
RM
62(void)(((N) < _FP_W_TYPE_SIZE) \
63 ? ({ \
64 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
65 X##_f1 >>= (N); \
66 }) \
67 : ({ \
68 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
69 X##_f1 = 0; \
70 }))
d876f532
UD
71
72/* Right shift with sticky-lsb. */
fe0b1e85
RM
73#define _FP_FRAC_SRST_2(X,S, N,sz) \
74(void)(((N) < _FP_W_TYPE_SIZE) \
75 ? ({ \
76 S = (__builtin_constant_p(N) && (N) == 1 \
77 ? X##_f0 & 1 \
78 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
79 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
80 X##_f1 >>= (N); \
81 }) \
82 : ({ \
83 S = ((((N) == _FP_W_TYPE_SIZE \
84 ? 0 \
85 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
86 | X##_f0) != 0); \
87 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
88 X##_f1 = 0; \
89 }))
90
91#define _FP_FRAC_SRS_2(X,N,sz) \
92(void)(((N) < _FP_W_TYPE_SIZE) \
93 ? ({ \
94 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
95 (__builtin_constant_p(N) && (N) == 1 \
96 ? X##_f0 & 1 \
97 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
98 X##_f1 >>= (N); \
99 }) \
100 : ({ \
101 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
102 ((((N) == _FP_W_TYPE_SIZE \
103 ? 0 \
104 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
105 | X##_f0) != 0)); \
106 X##_f1 = 0; \
107 }))
d876f532
UD
108
109#define _FP_FRAC_ADDI_2(X,I) \
110 __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
111
112#define _FP_FRAC_ADD_2(R,X,Y) \
113 __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
114
115#define _FP_FRAC_SUB_2(R,X,Y) \
116 __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
117
118#define _FP_FRAC_DEC_2(X,Y) \
119 __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
120
121#define _FP_FRAC_CLZ_2(R,X) \
122 do { \
123 if (X##_f1) \
124 __FP_CLZ(R,X##_f1); \
125 else \
126 { \
127 __FP_CLZ(R,X##_f0); \
128 R += _FP_W_TYPE_SIZE; \
129 } \
130 } while(0)
131
132/* Predicates */
133#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
134#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
135#define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
cf299341 136#define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
77f01ab5
JM
137#define _FP_FRAC_HIGHBIT_DW_2(fs,X) \
138 (_FP_FRAC_HIGH_DW_##fs(X) & _FP_HIGHBIT_DW_##fs)
d876f532
UD
139#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
140#define _FP_FRAC_GT_2(X, Y) \
fe0b1e85 141 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
d876f532 142#define _FP_FRAC_GE_2(X, Y) \
fe0b1e85 143 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
d876f532
UD
144
145#define _FP_ZEROFRAC_2 0, 0
146#define _FP_MINFRAC_2 0, 1
147#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
148
149/*
9c84384c 150 * Internals
d876f532
UD
151 */
152
153#define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
154
155#define __FP_CLZ_2(R, xh, xl) \
156 do { \
157 if (xh) \
158 __FP_CLZ(R,xh); \
159 else \
160 { \
161 __FP_CLZ(R,xl); \
162 R += _FP_W_TYPE_SIZE; \
163 } \
164 } while(0)
165
166#if 0
167
168#ifndef __FP_FRAC_ADDI_2
169#define __FP_FRAC_ADDI_2(xh, xl, i) \
170 (xh += ((xl += i) < i))
171#endif
172#ifndef __FP_FRAC_ADD_2
173#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
174 (rh = xh + yh + ((rl = xl + yl) < xl))
175#endif
176#ifndef __FP_FRAC_SUB_2
177#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
178 (rh = xh - yh - ((rl = xl - yl) > xl))
179#endif
180#ifndef __FP_FRAC_DEC_2
181#define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
182 do { \
183 UWtype _t = xl; \
184 xh -= yh + ((xl -= yl) > _t); \
185 } while (0)
186#endif
187
188#else
189
190#undef __FP_FRAC_ADDI_2
191#define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
192#undef __FP_FRAC_ADD_2
193#define __FP_FRAC_ADD_2 add_ssaaaa
194#undef __FP_FRAC_SUB_2
195#define __FP_FRAC_SUB_2 sub_ddmmss
196#undef __FP_FRAC_DEC_2
197#define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
198
199#endif
200
201/*
202 * Unpack the raw bits of a native fp value. Do not classify or
203 * normalize the data.
204 */
205
206#define _FP_UNPACK_RAW_2(fs, X, val) \
207 do { \
208 union _FP_UNION_##fs _flo; _flo.flt = (val); \
209 \
210 X##_f0 = _flo.bits.frac0; \
211 X##_f1 = _flo.bits.frac1; \
212 X##_e = _flo.bits.exp; \
213 X##_s = _flo.bits.sign; \
214 } while (0)
215
216#define _FP_UNPACK_RAW_2_P(fs, X, val) \
217 do { \
218 union _FP_UNION_##fs *_flo = \
219 (union _FP_UNION_##fs *)(val); \
220 \
221 X##_f0 = _flo->bits.frac0; \
222 X##_f1 = _flo->bits.frac1; \
223 X##_e = _flo->bits.exp; \
224 X##_s = _flo->bits.sign; \
225 } while (0)
226
227
228/*
229 * Repack the raw bits of a native fp value.
230 */
231
232#define _FP_PACK_RAW_2(fs, val, X) \
233 do { \
234 union _FP_UNION_##fs _flo; \
235 \
236 _flo.bits.frac0 = X##_f0; \
237 _flo.bits.frac1 = X##_f1; \
238 _flo.bits.exp = X##_e; \
239 _flo.bits.sign = X##_s; \
240 \
241 (val) = _flo.flt; \
242 } while (0)
243
244#define _FP_PACK_RAW_2_P(fs, val, X) \
245 do { \
246 union _FP_UNION_##fs *_flo = \
247 (union _FP_UNION_##fs *)(val); \
248 \
249 _flo->bits.frac0 = X##_f0; \
250 _flo->bits.frac1 = X##_f1; \
251 _flo->bits.exp = X##_e; \
252 _flo->bits.sign = X##_s; \
253 } while (0)
254
255
256/*
257 * Multiplication algorithms:
258 */
259
260/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
261
77f01ab5 262#define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
d876f532 263 do { \
77f01ab5 264 _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
d876f532 265 \
77f01ab5 266 doit(_FP_FRAC_WORD_4(R,1), _FP_FRAC_WORD_4(R,0), X##_f0, Y##_f0); \
d876f532
UD
267 doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
268 doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
77f01ab5 269 doit(_FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2), X##_f1, Y##_f1); \
d876f532 270 \
77f01ab5
JM
271 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
272 _FP_FRAC_WORD_4(R,1), 0, _b_f1, _b_f0, \
273 _FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
274 _FP_FRAC_WORD_4(R,1)); \
275 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
276 _FP_FRAC_WORD_4(R,1), 0, _c_f1, _c_f0, \
277 _FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
278 _FP_FRAC_WORD_4(R,1)); \
279 } while (0)
280
281#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
282 do { \
283 _FP_FRAC_DECL_4(_z); \
284 \
285 _FP_MUL_MEAT_DW_2_wide(wfracbits, _z, X, Y, doit); \
d876f532
UD
286 \
287 /* Normalize since we know where the msb of the multiplicands \
288 were (bit B), we know that the msb of the of the product is \
289 at either 2B or 2B-1. */ \
290 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
291 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
292 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
293 } while (0)
294
295/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
296 Do only 3 multiplications instead of four. This one is for machines
297 where multiplication is much more expensive than subtraction. */
298
77f01ab5 299#define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
d876f532 300 do { \
77f01ab5 301 _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
d876f532
UD
302 _FP_W_TYPE _d; \
303 int _c1, _c2; \
304 \
305 _b_f0 = X##_f0 + X##_f1; \
306 _c1 = _b_f0 < X##_f0; \
307 _b_f1 = Y##_f0 + Y##_f1; \
308 _c2 = _b_f1 < Y##_f0; \
77f01ab5
JM
309 doit(_d, _FP_FRAC_WORD_4(R,0), X##_f0, Y##_f0); \
310 doit(_FP_FRAC_WORD_4(R,2), _FP_FRAC_WORD_4(R,1), _b_f0, _b_f1); \
d876f532
UD
311 doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
312 \
313 _b_f0 &= -_c2; \
314 _b_f1 &= -_c1; \
77f01ab5
JM
315 __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
316 _FP_FRAC_WORD_4(R,1), (_c1 & _c2), 0, _d, \
317 0, _FP_FRAC_WORD_4(R,2), _FP_FRAC_WORD_4(R,1)); \
318 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
d876f532 319 _b_f0); \
77f01ab5 320 __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
d876f532 321 _b_f1); \
77f01ab5
JM
322 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
323 _FP_FRAC_WORD_4(R,1), \
324 0, _d, _FP_FRAC_WORD_4(R,0)); \
325 __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(R,3),_FP_FRAC_WORD_4(R,2), \
326 _FP_FRAC_WORD_4(R,1), 0, _c_f1, _c_f0); \
327 __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2), \
d876f532 328 _c_f1, _c_f0, \
77f01ab5
JM
329 _FP_FRAC_WORD_4(R,3), _FP_FRAC_WORD_4(R,2)); \
330 } while (0)
331
332#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
333 do { \
334 _FP_FRAC_DECL_4(_z); \
335 \
336 _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, _z, X, Y, doit); \
d876f532
UD
337 \
338 /* Normalize since we know where the msb of the multiplicands \
339 were (bit B), we know that the msb of the of the product is \
340 at either 2B or 2B-1. */ \
341 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
342 R##_f0 = _FP_FRAC_WORD_4(_z,0); \
343 R##_f1 = _FP_FRAC_WORD_4(_z,1); \
344 } while (0)
345
77f01ab5 346#define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
d876f532 347 do { \
d876f532
UD
348 _FP_W_TYPE _x[2], _y[2]; \
349 _x[0] = X##_f0; _x[1] = X##_f1; \
350 _y[0] = Y##_f0; _y[1] = Y##_f1; \
351 \
77f01ab5
JM
352 mpn_mul_n(R##_f, _x, _y, 2); \
353 } while (0)
354
355#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
356 do { \
357 _FP_FRAC_DECL_4(_z); \
358 \
359 _FP_MUL_MEAT_DW_2_gmp(wfracbits, _z, X, Y); \
d876f532
UD
360 \
361 /* Normalize since we know where the msb of the multiplicands \
362 were (bit B), we know that the msb of the of the product is \
363 at either 2B or 2B-1. */ \
364 _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
365 R##_f0 = _z_f[0]; \
366 R##_f1 = _z_f[1]; \
367 } while (0)
368
369/* Do at most 120x120=240 bits multiplication using double floating
370 point multiplication. This is useful if floating point
371 multiplication has much bigger throughput than integer multiply.
372 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
9c84384c 373 between 106 and 120 only.
d876f532
UD
374 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
375 SETFETZ is a macro which will disable all FPU exceptions and set rounding
376 towards zero, RESETFE should optionally reset it back. */
377
378#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
379 do { \
380 static const double _const[] = { \
381 /* 2^-24 */ 5.9604644775390625e-08, \
382 /* 2^-48 */ 3.5527136788005009e-15, \
383 /* 2^-72 */ 2.1175823681357508e-22, \
384 /* 2^-96 */ 1.2621774483536189e-29, \
385 /* 2^28 */ 2.68435456e+08, \
386 /* 2^4 */ 1.600000e+01, \
387 /* 2^-20 */ 9.5367431640625e-07, \
388 /* 2^-44 */ 5.6843418860808015e-14, \
389 /* 2^-68 */ 3.3881317890172014e-21, \
390 /* 2^-92 */ 2.0194839173657902e-28, \
391 /* 2^-116 */ 1.2037062152420224e-35}; \
392 double _a240, _b240, _c240, _d240, _e240, _f240, \
393 _g240, _h240, _i240, _j240, _k240; \
394 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
395 _p240, _q240, _r240, _s240; \
396 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
397 \
398 if (wfracbits < 106 || wfracbits > 120) \
399 abort(); \
400 \
401 setfetz; \
402 \
403 _e240 = (double)(long)(X##_f0 & 0xffffff); \
404 _j240 = (double)(long)(Y##_f0 & 0xffffff); \
405 _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
406 _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
407 _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
408 _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
409 _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
410 _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
411 _a240 = (double)(long)(X##_f1 >> 32); \
412 _f240 = (double)(long)(Y##_f1 >> 32); \
413 _e240 *= _const[3]; \
414 _j240 *= _const[3]; \
415 _d240 *= _const[2]; \
416 _i240 *= _const[2]; \
417 _c240 *= _const[1]; \
418 _h240 *= _const[1]; \
419 _b240 *= _const[0]; \
420 _g240 *= _const[0]; \
421 _s240.d = _e240*_j240;\
422 _r240.d = _d240*_j240 + _e240*_i240;\
423 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
424 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
425 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
426 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
427 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
428 _l240.d = _a240*_g240 + _b240*_f240; \
429 _k240 = _a240*_f240; \
430 _r240.d += _s240.d; \
431 _q240.d += _r240.d; \
432 _p240.d += _q240.d; \
433 _o240.d += _p240.d; \
434 _n240.d += _o240.d; \
435 _m240.d += _n240.d; \
436 _l240.d += _m240.d; \
437 _k240 += _l240.d; \
438 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \
439 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \
440 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \
441 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \
442 _o240.d += _const[7]; \
443 _n240.d += _const[6]; \
444 _m240.d += _const[5]; \
445 _l240.d += _const[4]; \
446 if (_s240.d != 0.0) _y240 = 1; \
447 if (_r240.d != 0.0) _y240 = 1; \
448 if (_q240.d != 0.0) _y240 = 1; \
449 if (_p240.d != 0.0) _y240 = 1; \
450 _t240 = (DItype)_k240; \
451 _u240 = _l240.i; \
452 _v240 = _m240.i; \
453 _w240 = _n240.i; \
454 _x240 = _o240.i; \
455 R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
456 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
457 R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
350635a5
OB
458 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
459 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
460 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
461 | _y240; \
d876f532
UD
462 resetfe; \
463 } while (0)
464
465/*
466 * Division algorithms:
467 */
468
469#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
470 do { \
471 _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
472 if (_FP_FRAC_GT_2(X, Y)) \
473 { \
474 _n_f2 = X##_f1 >> 1; \
475 _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
476 _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
477 } \
478 else \
479 { \
480 R##_e--; \
481 _n_f2 = X##_f1; \
482 _n_f1 = X##_f0; \
483 _n_f0 = 0; \
484 } \
485 \
486 /* Normalize, i.e. make the most significant bit of the \
487 denominator set. */ \
488 _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
489 \
490 udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
491 umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
492 _r_f0 = _n_f0; \
493 if (_FP_FRAC_GT_2(_m, _r)) \
494 { \
495 R##_f1--; \
496 _FP_FRAC_ADD_2(_r, Y, _r); \
497 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
498 { \
499 R##_f1--; \
500 _FP_FRAC_ADD_2(_r, Y, _r); \
501 } \
502 } \
503 _FP_FRAC_DEC_2(_r, _m); \
504 \
505 if (_r_f1 == Y##_f1) \
506 { \
507 /* This is a special case, not an optimization \
508 (_r/Y##_f1 would not fit into UWtype). \
509 As _r is guaranteed to be < Y, R##_f0 can be either \
510 (UWtype)-1 or (UWtype)-2. But as we know what kind \
511 of bits it is (sticky, guard, round), we don't care. \
512 We also don't care what the reminder is, because the \
513 guard bit will be set anyway. -jj */ \
514 R##_f0 = -1; \
515 } \
516 else \
517 { \
518 udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
519 umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
520 _r_f0 = 0; \
521 if (_FP_FRAC_GT_2(_m, _r)) \
522 { \
523 R##_f0--; \
524 _FP_FRAC_ADD_2(_r, Y, _r); \
525 if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
526 { \
527 R##_f0--; \
528 _FP_FRAC_ADD_2(_r, Y, _r); \
529 } \
530 } \
531 if (!_FP_FRAC_EQ_2(_r, _m)) \
532 R##_f0 |= _FP_WORK_STICKY; \
533 } \
534 } while (0)
535
536
537#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
538 do { \
539 _FP_W_TYPE _x[4], _y[2], _z[4]; \
540 _y[0] = Y##_f0; _y[1] = Y##_f1; \
541 _x[0] = _x[3] = 0; \
542 if (_FP_FRAC_GT_2(X, Y)) \
543 { \
544 R##_e++; \
545 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
546 X##_f1 >> (_FP_W_TYPE_SIZE - \
547 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
548 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
549 } \
550 else \
551 { \
552 _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
553 X##_f1 >> (_FP_W_TYPE_SIZE - \
554 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
555 _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
556 } \
557 \
558 (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
559 R##_f1 = _z[1]; \
560 R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
561 } while (0)
562
563
564/*
565 * Square root algorithms:
566 * We have just one right now, maybe Newton approximation
567 * should be added for those machines where division is fast.
568 */
9c84384c 569
d876f532
UD
570#define _FP_SQRT_MEAT_2(R, S, T, X, q) \
571 do { \
572 while (q) \
573 { \
574 T##_f1 = S##_f1 + q; \
575 if (T##_f1 <= X##_f1) \
576 { \
577 S##_f1 = T##_f1 + q; \
578 X##_f1 -= T##_f1; \
579 R##_f1 += q; \
580 } \
581 _FP_FRAC_SLL_2(X, 1); \
582 q >>= 1; \
583 } \
584 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
585 while (q != _FP_WORK_ROUND) \
586 { \
587 T##_f0 = S##_f0 + q; \
588 T##_f1 = S##_f1; \
589 if (T##_f1 < X##_f1 || \
590 (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
591 { \
592 S##_f0 = T##_f0 + q; \
593 S##_f1 += (T##_f0 > S##_f0); \
594 _FP_FRAC_DEC_2(X, T); \
595 R##_f0 += q; \
596 } \
597 _FP_FRAC_SLL_2(X, 1); \
598 q >>= 1; \
599 } \
600 if (X##_f0 | X##_f1) \
601 { \
602 if (S##_f1 < X##_f1 || \
603 (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
604 R##_f0 |= _FP_WORK_ROUND; \
605 R##_f0 |= _FP_WORK_STICKY; \
606 } \
607 } while (0)
608
609
610/*
9c84384c 611 * Assembly/disassembly for converting to/from integral types.
d876f532
UD
612 * No shifting or overflow handled here.
613 */
614
615#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
fe0b1e85
RM
616(void)((rsize <= _FP_W_TYPE_SIZE) \
617 ? ({ r = X##_f0; }) \
618 : ({ \
619 r = X##_f1; \
620 r <<= _FP_W_TYPE_SIZE; \
621 r += X##_f0; \
622 }))
d876f532
UD
623
624#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
625 do { \
626 X##_f0 = r; \
627 X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
628 } while (0)
629
630/*
631 * Convert FP values between word sizes
632 */
633
fe0b1e85 634#define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
d876f532 635
fe0b1e85 636#define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
37002cbc
JJ
637
638#define _FP_FRAC_COPY_2_2(D,S) _FP_FRAC_COPY_2(D,S)
This page took 0.42857 seconds and 5 git commands to generate.