Root/lib/mpi/longlong.h

1/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2 * Note: I added some stuff for use with gnupg
3 *
4 * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
5 * 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
6 *
7 * This file is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Library General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version.
11 *
12 * This file is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 * License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this file; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20 * MA 02111-1307, USA. */
21
22#include <asm-generic/bitops/count_zeros.h>
23
24/* You have to define the following before including this file:
25 *
26 * UWtype -- An unsigned type, default type for operations (typically a "word")
27 * UHWtype -- An unsigned type, at least half the size of UWtype.
28 * UDWtype -- An unsigned type, at least twice as large a UWtype
29 * W_TYPE_SIZE -- size in bits of UWtype
30 *
31 * SItype, USItype -- Signed and unsigned 32 bit types.
32 * DItype, UDItype -- Signed and unsigned 64 bit types.
33 *
34 * On a 32 bit machine UWtype should typically be USItype;
35 * on a 64 bit machine, UWtype should typically be UDItype.
36*/
37
38#define __BITS4 (W_TYPE_SIZE / 4)
39#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
40#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
41#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
42
43/* This is used to make sure no undesirable sharing between different libraries
44    that use this file takes place. */
45#ifndef __MPN
46#define __MPN(x) __##x
47#endif
48
49/* Define auxiliary asm macros.
50 *
51 * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
52 * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
53 * word product in HIGH_PROD and LOW_PROD.
54 *
55 * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
56 * UDWtype product. This is just a variant of umul_ppmm.
57
58 * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
59 * denominator) divides a UDWtype, composed by the UWtype integers
60 * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
61 * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
62 * than DENOMINATOR for correct operation. If, in addition, the most
63 * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
64 * UDIV_NEEDS_NORMALIZATION is defined to 1.
65 * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
66 * denominator). Like udiv_qrnnd but the numbers are signed. The quotient
67 * is rounded towards 0.
68 *
69 * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
70 * msb to the first non-zero bit in the UWtype X. This is the number of
71 * steps X needs to be shifted left to set the msb. Undefined for X == 0,
72 * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
73 *
74 * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
75 * from the least significant end.
76 *
77 * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
78 * high_addend_2, low_addend_2) adds two UWtype integers, composed by
79 * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
80 * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
81 * (i.e. carry out) is not stored anywhere, and is lost.
82 *
83 * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
84 * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
85 * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
86 * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
87 * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
88 * and is lost.
89 *
90 * If any of these macros are left undefined for a particular CPU,
91 * C macros are used. */
92
93/* The CPUs come in alphabetical order below.
94 *
95 * Please add support for more CPUs here, or improve the current support
96 * for the CPUs below! */
97
98#if defined(__GNUC__) && !defined(NO_ASM)
99
100/* We sometimes need to clobber "cc" with gcc2, but that would not be
101    understood by gcc1. Use cpp to avoid major code duplication. */
102#if __GNUC__ < 2
103#define __CLOBBER_CC
104#define __AND_CLOBBER_CC
105#else /* __GNUC__ >= 2 */
106#define __CLOBBER_CC : "cc"
107#define __AND_CLOBBER_CC , "cc"
108#endif /* __GNUC__ < 2 */
109
110/***************************************
111    ************** A29K *****************
112    ***************************************/
113#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
114#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
115    __asm__ ("add %1,%4,%5\n" \
116        "addc %0,%2,%3" \
117    : "=r" ((USItype)(sh)), \
118        "=&r" ((USItype)(sl)) \
119    : "%r" ((USItype)(ah)), \
120        "rI" ((USItype)(bh)), \
121        "%r" ((USItype)(al)), \
122        "rI" ((USItype)(bl)))
123#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
124    __asm__ ("sub %1,%4,%5\n" \
125        "subc %0,%2,%3" \
126    : "=r" ((USItype)(sh)), \
127        "=&r" ((USItype)(sl)) \
128    : "r" ((USItype)(ah)), \
129        "rI" ((USItype)(bh)), \
130        "r" ((USItype)(al)), \
131        "rI" ((USItype)(bl)))
132#define umul_ppmm(xh, xl, m0, m1) \
133do { \
134        USItype __m0 = (m0), __m1 = (m1); \
135        __asm__ ("multiplu %0,%1,%2" \
136        : "=r" ((USItype)(xl)) \
137        : "r" (__m0), \
138            "r" (__m1)); \
139        __asm__ ("multmu %0,%1,%2" \
140        : "=r" ((USItype)(xh)) \
141        : "r" (__m0), \
142            "r" (__m1)); \
143} while (0)
144#define udiv_qrnnd(q, r, n1, n0, d) \
145    __asm__ ("dividu %0,%3,%4" \
146    : "=r" ((USItype)(q)), \
147        "=q" ((USItype)(r)) \
148    : "1" ((USItype)(n1)), \
149        "r" ((USItype)(n0)), \
150        "r" ((USItype)(d)))
151#endif /* __a29k__ */
152
153#if defined(__alpha) && W_TYPE_SIZE == 64
154#define umul_ppmm(ph, pl, m0, m1) \
155do { \
156    UDItype __m0 = (m0), __m1 = (m1); \
157    (ph) = __builtin_alpha_umulh(__m0, __m1); \
158    (pl) = __m0 * __m1; \
159} while (0)
160#define UMUL_TIME 46
161#ifndef LONGLONG_STANDALONE
162#define udiv_qrnnd(q, r, n1, n0, d) \
163do { UDItype __r; \
164    (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
165    (r) = __r; \
166} while (0)
167extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
168#define UDIV_TIME 220
169#endif /* LONGLONG_STANDALONE */
170#endif /* __alpha */
171
172/***************************************
173    ************** ARM ******************
174    ***************************************/
175#if defined(__arm__) && W_TYPE_SIZE == 32
176#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
177    __asm__ ("adds %1, %4, %5\n" \
178        "adc %0, %2, %3" \
179    : "=r" ((USItype)(sh)), \
180        "=&r" ((USItype)(sl)) \
181    : "%r" ((USItype)(ah)), \
182        "rI" ((USItype)(bh)), \
183        "%r" ((USItype)(al)), \
184        "rI" ((USItype)(bl)))
185#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
186    __asm__ ("subs %1, %4, %5\n" \
187        "sbc %0, %2, %3" \
188    : "=r" ((USItype)(sh)), \
189        "=&r" ((USItype)(sl)) \
190    : "r" ((USItype)(ah)), \
191        "rI" ((USItype)(bh)), \
192        "r" ((USItype)(al)), \
193        "rI" ((USItype)(bl)))
194#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
195#define umul_ppmm(xh, xl, a, b) \
196    __asm__ ("%@ Inlined umul_ppmm\n" \
197        "mov %|r0, %2, lsr #16 @ AAAA\n" \
198        "mov %|r2, %3, lsr #16 @ BBBB\n" \
199        "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \
200        "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \
201        "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \
202        "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \
203        "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \
204        "mul %0, %|r0, %0 @ AAAA * bbbb\n" \
205        "adds %|r0, %1, %0 @ central sum\n" \
206        "addcs %|r2, %|r2, #65536\n" \
207        "adds %1, %|r1, %|r0, lsl #16\n" \
208        "adc %0, %|r2, %|r0, lsr #16" \
209    : "=&r" ((USItype)(xh)), \
210        "=r" ((USItype)(xl)) \
211    : "r" ((USItype)(a)), \
212        "r" ((USItype)(b)) \
213    : "r0", "r1", "r2")
214#else
215#define umul_ppmm(xh, xl, a, b) \
216    __asm__ ("%@ Inlined umul_ppmm\n" \
217        "umull %r1, %r0, %r2, %r3" \
218    : "=&r" ((USItype)(xh)), \
219            "=r" ((USItype)(xl)) \
220    : "r" ((USItype)(a)), \
221            "r" ((USItype)(b)) \
222    : "r0", "r1")
223#endif
224#define UMUL_TIME 20
225#define UDIV_TIME 100
226#endif /* __arm__ */
227
228/***************************************
229    ************** CLIPPER **************
230    ***************************************/
231#if defined(__clipper__) && W_TYPE_SIZE == 32
232#define umul_ppmm(w1, w0, u, v) \
233    ({union {UDItype __ll; \
234        struct {USItype __l, __h; } __i; \
235    } __xx; \
236    __asm__ ("mulwux %2,%0" \
237    : "=r" (__xx.__ll) \
238    : "%0" ((USItype)(u)), \
239        "r" ((USItype)(v))); \
240    (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
241#define smul_ppmm(w1, w0, u, v) \
242    ({union {DItype __ll; \
243        struct {SItype __l, __h; } __i; \
244    } __xx; \
245    __asm__ ("mulwx %2,%0" \
246    : "=r" (__xx.__ll) \
247    : "%0" ((SItype)(u)), \
248        "r" ((SItype)(v))); \
249    (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
250#define __umulsidi3(u, v) \
251    ({UDItype __w; \
252    __asm__ ("mulwux %2,%0" \
253    : "=r" (__w) \
254    : "%0" ((USItype)(u)), \
255        "r" ((USItype)(v))); \
256    __w; })
257#endif /* __clipper__ */
258
259/***************************************
260    ************** GMICRO ***************
261    ***************************************/
262#if defined(__gmicro__) && W_TYPE_SIZE == 32
263#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
264    __asm__ ("add.w %5,%1\n" \
265        "addx %3,%0" \
266    : "=g" ((USItype)(sh)), \
267        "=&g" ((USItype)(sl)) \
268    : "%0" ((USItype)(ah)), \
269        "g" ((USItype)(bh)), \
270        "%1" ((USItype)(al)), \
271        "g" ((USItype)(bl)))
272#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
273    __asm__ ("sub.w %5,%1\n" \
274        "subx %3,%0" \
275    : "=g" ((USItype)(sh)), \
276        "=&g" ((USItype)(sl)) \
277    : "0" ((USItype)(ah)), \
278        "g" ((USItype)(bh)), \
279        "1" ((USItype)(al)), \
280        "g" ((USItype)(bl)))
281#define umul_ppmm(ph, pl, m0, m1) \
282    __asm__ ("mulx %3,%0,%1" \
283    : "=g" ((USItype)(ph)), \
284        "=r" ((USItype)(pl)) \
285    : "%0" ((USItype)(m0)), \
286        "g" ((USItype)(m1)))
287#define udiv_qrnnd(q, r, nh, nl, d) \
288    __asm__ ("divx %4,%0,%1" \
289    : "=g" ((USItype)(q)), \
290        "=r" ((USItype)(r)) \
291    : "1" ((USItype)(nh)), \
292        "0" ((USItype)(nl)), \
293        "g" ((USItype)(d)))
294#endif
295
296/***************************************
297    ************** HPPA *****************
298    ***************************************/
299#if defined(__hppa) && W_TYPE_SIZE == 32
300#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
301    __asm__ ("add %4,%5,%1\n" \
302           "addc %2,%3,%0" \
303    : "=r" ((USItype)(sh)), \
304         "=&r" ((USItype)(sl)) \
305    : "%rM" ((USItype)(ah)), \
306         "rM" ((USItype)(bh)), \
307         "%rM" ((USItype)(al)), \
308         "rM" ((USItype)(bl)))
309#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
310    __asm__ ("sub %4,%5,%1\n" \
311       "subb %2,%3,%0" \
312    : "=r" ((USItype)(sh)), \
313         "=&r" ((USItype)(sl)) \
314    : "rM" ((USItype)(ah)), \
315         "rM" ((USItype)(bh)), \
316         "rM" ((USItype)(al)), \
317         "rM" ((USItype)(bl)))
318#if 0 && defined(_PA_RISC1_1)
319/* xmpyu uses floating point register which is not allowed in Linux kernel. */
320#define umul_ppmm(wh, wl, u, v) \
321do { \
322    union {UDItype __ll; \
323    struct {USItype __h, __l; } __i; \
324    } __xx; \
325    __asm__ ("xmpyu %1,%2,%0" \
326    : "=*f" (__xx.__ll) \
327    : "*f" ((USItype)(u)), \
328           "*f" ((USItype)(v))); \
329    (wh) = __xx.__i.__h; \
330    (wl) = __xx.__i.__l; \
331} while (0)
332#define UMUL_TIME 8
333#define UDIV_TIME 60
334#else
335#define UMUL_TIME 40
336#define UDIV_TIME 80
337#endif
338#if 0 /* #ifndef LONGLONG_STANDALONE */
339#define udiv_qrnnd(q, r, n1, n0, d) \
340do { USItype __r; \
341    (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
342    (r) = __r; \
343} while (0)
344extern USItype __udiv_qrnnd();
345#endif /* LONGLONG_STANDALONE */
346#endif /* hppa */
347
348/***************************************
349    ************** I370 *****************
350    ***************************************/
351#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
352#define umul_ppmm(xh, xl, m0, m1) \
353do { \
354    union {UDItype __ll; \
355       struct {USItype __h, __l; } __i; \
356    } __xx; \
357    USItype __m0 = (m0), __m1 = (m1); \
358    __asm__ ("mr %0,%3" \
359    : "=r" (__xx.__i.__h), \
360           "=r" (__xx.__i.__l) \
361    : "%1" (__m0), \
362           "r" (__m1)); \
363    (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
364    (xh) += ((((SItype) __m0 >> 31) & __m1) \
365         + (((SItype) __m1 >> 31) & __m0)); \
366} while (0)
367#define smul_ppmm(xh, xl, m0, m1) \
368do { \
369    union {DItype __ll; \
370       struct {USItype __h, __l; } __i; \
371    } __xx; \
372    __asm__ ("mr %0,%3" \
373    : "=r" (__xx.__i.__h), \
374           "=r" (__xx.__i.__l) \
375    : "%1" (m0), \
376           "r" (m1)); \
377    (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
378} while (0)
379#define sdiv_qrnnd(q, r, n1, n0, d) \
380do { \
381    union {DItype __ll; \
382       struct {USItype __h, __l; } __i; \
383    } __xx; \
384    __xx.__i.__h = n1; __xx.__i.__l = n0; \
385    __asm__ ("dr %0,%2" \
386    : "=r" (__xx.__ll) \
387    : "0" (__xx.__ll), "r" (d)); \
388    (q) = __xx.__i.__l; (r) = __xx.__i.__h; \
389} while (0)
390#endif
391
392/***************************************
393    ************** I386 *****************
394    ***************************************/
395#undef __i386__
396#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
397#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
398    __asm__ ("addl %5,%1\n" \
399       "adcl %3,%0" \
400    : "=r" ((USItype)(sh)), \
401         "=&r" ((USItype)(sl)) \
402    : "%0" ((USItype)(ah)), \
403         "g" ((USItype)(bh)), \
404         "%1" ((USItype)(al)), \
405         "g" ((USItype)(bl)))
406#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
407    __asm__ ("subl %5,%1\n" \
408       "sbbl %3,%0" \
409    : "=r" ((USItype)(sh)), \
410         "=&r" ((USItype)(sl)) \
411    : "0" ((USItype)(ah)), \
412         "g" ((USItype)(bh)), \
413         "1" ((USItype)(al)), \
414         "g" ((USItype)(bl)))
415#define umul_ppmm(w1, w0, u, v) \
416    __asm__ ("mull %3" \
417    : "=a" ((USItype)(w0)), \
418         "=d" ((USItype)(w1)) \
419    : "%0" ((USItype)(u)), \
420         "rm" ((USItype)(v)))
421#define udiv_qrnnd(q, r, n1, n0, d) \
422    __asm__ ("divl %4" \
423    : "=a" ((USItype)(q)), \
424         "=d" ((USItype)(r)) \
425    : "0" ((USItype)(n0)), \
426         "1" ((USItype)(n1)), \
427         "rm" ((USItype)(d)))
428#ifndef UMUL_TIME
429#define UMUL_TIME 40
430#endif
431#ifndef UDIV_TIME
432#define UDIV_TIME 40
433#endif
434#endif /* 80x86 */
435
436/***************************************
437    ************** I860 *****************
438    ***************************************/
439#if defined(__i860__) && W_TYPE_SIZE == 32
440#define rshift_rhlc(r, h, l, c) \
441    __asm__ ("shr %3,r0,r0\n" \
442    "shrd %1,%2,%0" \
443       "=r" (r) : "r" (h), "r" (l), "rn" (c))
444#endif /* i860 */
445
446/***************************************
447    ************** I960 *****************
448    ***************************************/
449#if defined(__i960__) && W_TYPE_SIZE == 32
450#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
451    __asm__ ("cmpo 1,0\n" \
452    "addc %5,%4,%1\n" \
453    "addc %3,%2,%0" \
454    : "=r" ((USItype)(sh)), \
455         "=&r" ((USItype)(sl)) \
456    : "%dI" ((USItype)(ah)), \
457         "dI" ((USItype)(bh)), \
458         "%dI" ((USItype)(al)), \
459         "dI" ((USItype)(bl)))
460#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
461    __asm__ ("cmpo 0,0\n" \
462    "subc %5,%4,%1\n" \
463    "subc %3,%2,%0" \
464    : "=r" ((USItype)(sh)), \
465         "=&r" ((USItype)(sl)) \
466    : "dI" ((USItype)(ah)), \
467         "dI" ((USItype)(bh)), \
468         "dI" ((USItype)(al)), \
469         "dI" ((USItype)(bl)))
470#define umul_ppmm(w1, w0, u, v) \
471    ({union {UDItype __ll; \
472       struct {USItype __l, __h; } __i; \
473    } __xx; \
474    __asm__ ("emul %2,%1,%0" \
475    : "=d" (__xx.__ll) \
476    : "%dI" ((USItype)(u)), \
477         "dI" ((USItype)(v))); \
478    (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
479#define __umulsidi3(u, v) \
480    ({UDItype __w; \
481    __asm__ ("emul %2,%1,%0" \
482    : "=d" (__w) \
483    : "%dI" ((USItype)(u)), \
484           "dI" ((USItype)(v))); \
485    __w; })
486#define udiv_qrnnd(q, r, nh, nl, d) \
487do { \
488    union {UDItype __ll; \
489       struct {USItype __l, __h; } __i; \
490    } __nn; \
491    __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
492    __asm__ ("ediv %d,%n,%0" \
493    : "=d" (__rq.__ll) \
494    : "dI" (__nn.__ll), \
495         "dI" ((USItype)(d))); \
496    (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
497} while (0)
498#if defined(__i960mx) /* what is the proper symbol to test??? */
499#define rshift_rhlc(r, h, l, c) \
500do { \
501    union {UDItype __ll; \
502       struct {USItype __l, __h; } __i; \
503    } __nn; \
504    __nn.__i.__h = (h); __nn.__i.__l = (l); \
505    __asm__ ("shre %2,%1,%0" \
506    : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
507}
508#endif /* i960mx */
509#endif /* i960 */
510
511/***************************************
512    ************** 68000 ****************
513    ***************************************/
514#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
515#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
516    __asm__ ("add%.l %5,%1\n" \
517       "addx%.l %3,%0" \
518    : "=d" ((USItype)(sh)), \
519         "=&d" ((USItype)(sl)) \
520    : "%0" ((USItype)(ah)), \
521         "d" ((USItype)(bh)), \
522         "%1" ((USItype)(al)), \
523         "g" ((USItype)(bl)))
524#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
525    __asm__ ("sub%.l %5,%1\n" \
526       "subx%.l %3,%0" \
527    : "=d" ((USItype)(sh)), \
528         "=&d" ((USItype)(sl)) \
529    : "0" ((USItype)(ah)), \
530         "d" ((USItype)(bh)), \
531         "1" ((USItype)(al)), \
532         "g" ((USItype)(bl)))
533#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
534#define umul_ppmm(w1, w0, u, v) \
535    __asm__ ("mulu%.l %3,%1:%0" \
536    : "=d" ((USItype)(w0)), \
537         "=d" ((USItype)(w1)) \
538    : "%0" ((USItype)(u)), \
539         "dmi" ((USItype)(v)))
540#define UMUL_TIME 45
541#define udiv_qrnnd(q, r, n1, n0, d) \
542    __asm__ ("divu%.l %4,%1:%0" \
543    : "=d" ((USItype)(q)), \
544         "=d" ((USItype)(r)) \
545    : "0" ((USItype)(n0)), \
546         "1" ((USItype)(n1)), \
547         "dmi" ((USItype)(d)))
548#define UDIV_TIME 90
549#define sdiv_qrnnd(q, r, n1, n0, d) \
550    __asm__ ("divs%.l %4,%1:%0" \
551    : "=d" ((USItype)(q)), \
552         "=d" ((USItype)(r)) \
553    : "0" ((USItype)(n0)), \
554         "1" ((USItype)(n1)), \
555         "dmi" ((USItype)(d)))
556#else /* not mc68020 */
557#define umul_ppmm(xh, xl, a, b) \
558do { USItype __umul_tmp1, __umul_tmp2; \
559    __asm__ ("| Inlined umul_ppmm\n" \
560    "move%.l %5,%3\n" \
561    "move%.l %2,%0\n" \
562    "move%.w %3,%1\n" \
563    "swap %3\n" \
564    "swap %0\n" \
565    "mulu %2,%1\n" \
566    "mulu %3,%0\n" \
567    "mulu %2,%3\n" \
568    "swap %2\n" \
569    "mulu %5,%2\n" \
570    "add%.l %3,%2\n" \
571    "jcc 1f\n" \
572    "add%.l %#0x10000,%0\n" \
573    "1: move%.l %2,%3\n" \
574    "clr%.w %2\n" \
575    "swap %2\n" \
576    "swap %3\n" \
577    "clr%.w %3\n" \
578    "add%.l %3,%1\n" \
579    "addx%.l %2,%0\n" \
580    "| End inlined umul_ppmm" \
581    : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
582        "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
583    : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
584} while (0)
585#define UMUL_TIME 100
586#define UDIV_TIME 400
587#endif /* not mc68020 */
588#endif /* mc68000 */
589
590/***************************************
591    ************** 88000 ****************
592    ***************************************/
593#if defined(__m88000__) && W_TYPE_SIZE == 32
594#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
595    __asm__ ("addu.co %1,%r4,%r5\n" \
596       "addu.ci %0,%r2,%r3" \
597    : "=r" ((USItype)(sh)), \
598         "=&r" ((USItype)(sl)) \
599    : "%rJ" ((USItype)(ah)), \
600         "rJ" ((USItype)(bh)), \
601         "%rJ" ((USItype)(al)), \
602         "rJ" ((USItype)(bl)))
603#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
604    __asm__ ("subu.co %1,%r4,%r5\n" \
605       "subu.ci %0,%r2,%r3" \
606    : "=r" ((USItype)(sh)), \
607         "=&r" ((USItype)(sl)) \
608    : "rJ" ((USItype)(ah)), \
609         "rJ" ((USItype)(bh)), \
610         "rJ" ((USItype)(al)), \
611         "rJ" ((USItype)(bl)))
612#if defined(__m88110__)
613#define umul_ppmm(wh, wl, u, v) \
614do { \
615    union {UDItype __ll; \
616       struct {USItype __h, __l; } __i; \
617    } __x; \
618    __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
619    (wh) = __x.__i.__h; \
620    (wl) = __x.__i.__l; \
621} while (0)
622#define udiv_qrnnd(q, r, n1, n0, d) \
623    ({union {UDItype __ll; \
624       struct {USItype __h, __l; } __i; \
625    } __x, __q; \
626    __x.__i.__h = (n1); __x.__i.__l = (n0); \
627    __asm__ ("divu.d %0,%1,%2" \
628    : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
629    (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
630#define UMUL_TIME 5
631#define UDIV_TIME 25
632#else
633#define UMUL_TIME 17
634#define UDIV_TIME 150
635#endif /* __m88110__ */
636#endif /* __m88000__ */
637
638/***************************************
639    ************** MIPS *****************
640    ***************************************/
641#if defined(__mips__) && W_TYPE_SIZE == 32
642#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
643#define umul_ppmm(w1, w0, u, v) \
644do { \
645    UDItype __ll = (UDItype)(u) * (v); \
646    w1 = __ll >> 32; \
647    w0 = __ll; \
648} while (0)
649#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
650#define umul_ppmm(w1, w0, u, v) \
651    __asm__ ("multu %2,%3" \
652    : "=l" ((USItype)(w0)), \
653         "=h" ((USItype)(w1)) \
654    : "d" ((USItype)(u)), \
655         "d" ((USItype)(v)))
656#else
657#define umul_ppmm(w1, w0, u, v) \
658    __asm__ ("multu %2,%3\n" \
659       "mflo %0\n" \
660       "mfhi %1" \
661    : "=d" ((USItype)(w0)), \
662         "=d" ((USItype)(w1)) \
663    : "d" ((USItype)(u)), \
664         "d" ((USItype)(v)))
665#endif
666#define UMUL_TIME 10
667#define UDIV_TIME 100
668#endif /* __mips__ */
669
670/***************************************
671    ************** MIPS/64 **************
672    ***************************************/
673#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
674#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
675#define umul_ppmm(w1, w0, u, v) \
676do { \
677    typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
678    __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \
679    w1 = __ll >> 64; \
680    w0 = __ll; \
681} while (0)
682#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
683#define umul_ppmm(w1, w0, u, v) \
684    __asm__ ("dmultu %2,%3" \
685    : "=l" ((UDItype)(w0)), \
686         "=h" ((UDItype)(w1)) \
687    : "d" ((UDItype)(u)), \
688         "d" ((UDItype)(v)))
689#else
690#define umul_ppmm(w1, w0, u, v) \
691    __asm__ ("dmultu %2,%3\n" \
692       "mflo %0\n" \
693       "mfhi %1" \
694    : "=d" ((UDItype)(w0)), \
695         "=d" ((UDItype)(w1)) \
696    : "d" ((UDItype)(u)), \
697         "d" ((UDItype)(v)))
698#endif
699#define UMUL_TIME 20
700#define UDIV_TIME 140
701#endif /* __mips__ */
702
703/***************************************
704    ************** 32000 ****************
705    ***************************************/
706#if defined(__ns32000__) && W_TYPE_SIZE == 32
707#define umul_ppmm(w1, w0, u, v) \
708    ({union {UDItype __ll; \
709       struct {USItype __l, __h; } __i; \
710    } __xx; \
711    __asm__ ("meid %2,%0" \
712    : "=g" (__xx.__ll) \
713    : "%0" ((USItype)(u)), \
714         "g" ((USItype)(v))); \
715    (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
716#define __umulsidi3(u, v) \
717    ({UDItype __w; \
718    __asm__ ("meid %2,%0" \
719    : "=g" (__w) \
720    : "%0" ((USItype)(u)), \
721           "g" ((USItype)(v))); \
722    __w; })
723#define udiv_qrnnd(q, r, n1, n0, d) \
724    ({union {UDItype __ll; \
725       struct {USItype __l, __h; } __i; \
726    } __xx; \
727    __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
728    __asm__ ("deid %2,%0" \
729    : "=g" (__xx.__ll) \
730    : "0" (__xx.__ll), \
731         "g" ((USItype)(d))); \
732    (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
733#endif /* __ns32000__ */
734
735/***************************************
736    ************** PPC ******************
737    ***************************************/
738#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
739#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
740do { \
741    if (__builtin_constant_p(bh) && (bh) == 0) \
742        __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
743        : "=r" ((USItype)(sh)), \
744        "=&r" ((USItype)(sl)) \
745        : "%r" ((USItype)(ah)), \
746        "%r" ((USItype)(al)), \
747        "rI" ((USItype)(bl))); \
748    else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
749        __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
750        : "=r" ((USItype)(sh)), \
751        "=&r" ((USItype)(sl)) \
752        : "%r" ((USItype)(ah)), \
753        "%r" ((USItype)(al)), \
754        "rI" ((USItype)(bl))); \
755    else \
756        __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
757        : "=r" ((USItype)(sh)), \
758        "=&r" ((USItype)(sl)) \
759        : "%r" ((USItype)(ah)), \
760        "r" ((USItype)(bh)), \
761        "%r" ((USItype)(al)), \
762        "rI" ((USItype)(bl))); \
763} while (0)
764#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
765do { \
766    if (__builtin_constant_p(ah) && (ah) == 0) \
767        __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
768        : "=r" ((USItype)(sh)), \
769        "=&r" ((USItype)(sl)) \
770        : "r" ((USItype)(bh)), \
771        "rI" ((USItype)(al)), \
772        "r" ((USItype)(bl))); \
773    else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
774        __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
775        : "=r" ((USItype)(sh)), \
776        "=&r" ((USItype)(sl)) \
777        : "r" ((USItype)(bh)), \
778        "rI" ((USItype)(al)), \
779        "r" ((USItype)(bl))); \
780    else if (__builtin_constant_p(bh) && (bh) == 0) \
781        __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
782        : "=r" ((USItype)(sh)), \
783        "=&r" ((USItype)(sl)) \
784        : "r" ((USItype)(ah)), \
785        "rI" ((USItype)(al)), \
786        "r" ((USItype)(bl))); \
787    else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
788        __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
789        : "=r" ((USItype)(sh)), \
790        "=&r" ((USItype)(sl)) \
791        : "r" ((USItype)(ah)), \
792        "rI" ((USItype)(al)), \
793        "r" ((USItype)(bl))); \
794    else \
795        __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
796        : "=r" ((USItype)(sh)), \
797        "=&r" ((USItype)(sl)) \
798        : "r" ((USItype)(ah)), \
799        "r" ((USItype)(bh)), \
800        "rI" ((USItype)(al)), \
801        "r" ((USItype)(bl))); \
802} while (0)
803#if defined(_ARCH_PPC)
804#define umul_ppmm(ph, pl, m0, m1) \
805do { \
806    USItype __m0 = (m0), __m1 = (m1); \
807    __asm__ ("mulhwu %0,%1,%2" \
808    : "=r" ((USItype) ph) \
809    : "%r" (__m0), \
810    "r" (__m1)); \
811    (pl) = __m0 * __m1; \
812} while (0)
813#define UMUL_TIME 15
814#define smul_ppmm(ph, pl, m0, m1) \
815do { \
816    SItype __m0 = (m0), __m1 = (m1); \
817    __asm__ ("mulhw %0,%1,%2" \
818    : "=r" ((SItype) ph) \
819    : "%r" (__m0), \
820    "r" (__m1)); \
821    (pl) = __m0 * __m1; \
822} while (0)
823#define SMUL_TIME 14
824#define UDIV_TIME 120
825#else
826#define umul_ppmm(xh, xl, m0, m1) \
827do { \
828    USItype __m0 = (m0), __m1 = (m1); \
829    __asm__ ("mul %0,%2,%3" \
830    : "=r" ((USItype)(xh)), \
831    "=q" ((USItype)(xl)) \
832    : "r" (__m0), \
833    "r" (__m1)); \
834    (xh) += ((((SItype) __m0 >> 31) & __m1) \
835    + (((SItype) __m1 >> 31) & __m0)); \
836} while (0)
837#define UMUL_TIME 8
838#define smul_ppmm(xh, xl, m0, m1) \
839    __asm__ ("mul %0,%2,%3" \
840    : "=r" ((SItype)(xh)), \
841    "=q" ((SItype)(xl)) \
842    : "r" (m0), \
843    "r" (m1))
844#define SMUL_TIME 4
845#define sdiv_qrnnd(q, r, nh, nl, d) \
846    __asm__ ("div %0,%2,%4" \
847    : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
848    : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
849#define UDIV_TIME 100
850#endif
851#endif /* Power architecture variants. */
852
853/***************************************
854    ************** PYR ******************
855    ***************************************/
856#if defined(__pyr__) && W_TYPE_SIZE == 32
857#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
858    __asm__ ("addw %5,%1\n" \
859    "addwc %3,%0" \
860    : "=r" ((USItype)(sh)), \
861    "=&r" ((USItype)(sl)) \
862    : "%0" ((USItype)(ah)), \
863    "g" ((USItype)(bh)), \
864    "%1" ((USItype)(al)), \
865    "g" ((USItype)(bl)))
866#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
867    __asm__ ("subw %5,%1\n" \
868    "subwb %3,%0" \
869    : "=r" ((USItype)(sh)), \
870    "=&r" ((USItype)(sl)) \
871    : "0" ((USItype)(ah)), \
872    "g" ((USItype)(bh)), \
873    "1" ((USItype)(al)), \
874    "g" ((USItype)(bl)))
875    /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
876#define umul_ppmm(w1, w0, u, v) \
877    ({union {UDItype __ll; \
878    struct {USItype __h, __l; } __i; \
879    } __xx; \
880    __asm__ ("movw %1,%R0\n" \
881    "uemul %2,%0" \
882    : "=&r" (__xx.__ll) \
883    : "g" ((USItype) (u)), \
884    "g" ((USItype)(v))); \
885    (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
886#endif /* __pyr__ */
887
888/***************************************
889    ************** RT/ROMP **************
890    ***************************************/
891#if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
892#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
893    __asm__ ("a %1,%5\n" \
894    "ae %0,%3" \
895    : "=r" ((USItype)(sh)), \
896    "=&r" ((USItype)(sl)) \
897    : "%0" ((USItype)(ah)), \
898    "r" ((USItype)(bh)), \
899    "%1" ((USItype)(al)), \
900    "r" ((USItype)(bl)))
901#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
902    __asm__ ("s %1,%5\n" \
903    "se %0,%3" \
904    : "=r" ((USItype)(sh)), \
905    "=&r" ((USItype)(sl)) \
906    : "0" ((USItype)(ah)), \
907    "r" ((USItype)(bh)), \
908    "1" ((USItype)(al)), \
909    "r" ((USItype)(bl)))
910#define umul_ppmm(ph, pl, m0, m1) \
911do { \
912    USItype __m0 = (m0), __m1 = (m1); \
913    __asm__ ( \
914    "s r2,r2\n" \
915    "mts r10,%2\n" \
916    "m r2,%3\n" \
917    "m r2,%3\n" \
918    "m r2,%3\n" \
919    "m r2,%3\n" \
920    "m r2,%3\n" \
921    "m r2,%3\n" \
922    "m r2,%3\n" \
923    "m r2,%3\n" \
924    "m r2,%3\n" \
925    "m r2,%3\n" \
926    "m r2,%3\n" \
927    "m r2,%3\n" \
928    "m r2,%3\n" \
929    "m r2,%3\n" \
930    "m r2,%3\n" \
931    "m r2,%3\n" \
932    "cas %0,r2,r0\n" \
933    "mfs r10,%1" \
934    : "=r" ((USItype)(ph)), \
935    "=r" ((USItype)(pl)) \
936    : "%r" (__m0), \
937    "r" (__m1) \
938    : "r2"); \
939    (ph) += ((((SItype) __m0 >> 31) & __m1) \
940    + (((SItype) __m1 >> 31) & __m0)); \
941} while (0)
942#define UMUL_TIME 20
943#define UDIV_TIME 200
944#endif /* RT/ROMP */
945
946/***************************************
947    ************** SH2 ******************
948    ***************************************/
949#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
950    && W_TYPE_SIZE == 32
951#define umul_ppmm(w1, w0, u, v) \
952    __asm__ ( \
953    "dmulu.l %2,%3\n" \
954    "sts macl,%1\n" \
955    "sts mach,%0" \
956    : "=r" ((USItype)(w1)), \
957    "=r" ((USItype)(w0)) \
958    : "r" ((USItype)(u)), \
959    "r" ((USItype)(v)) \
960    : "macl", "mach")
961#define UMUL_TIME 5
962#endif
963
964/***************************************
965    ************** SPARC ****************
966    ***************************************/
967#if defined(__sparc__) && W_TYPE_SIZE == 32
968#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
969    __asm__ ("addcc %r4,%5,%1\n" \
970    "addx %r2,%3,%0" \
971    : "=r" ((USItype)(sh)), \
972    "=&r" ((USItype)(sl)) \
973    : "%rJ" ((USItype)(ah)), \
974    "rI" ((USItype)(bh)), \
975    "%rJ" ((USItype)(al)), \
976    "rI" ((USItype)(bl)) \
977    __CLOBBER_CC)
978#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
979    __asm__ ("subcc %r4,%5,%1\n" \
980    "subx %r2,%3,%0" \
981    : "=r" ((USItype)(sh)), \
982    "=&r" ((USItype)(sl)) \
983    : "rJ" ((USItype)(ah)), \
984    "rI" ((USItype)(bh)), \
985    "rJ" ((USItype)(al)), \
986    "rI" ((USItype)(bl)) \
987    __CLOBBER_CC)
988#if defined(__sparc_v8__)
989/* Don't match immediate range because, 1) it is not often useful,
990    2) the 'I' flag thinks of the range as a 13 bit signed interval,
991    while we want to match a 13 bit interval, sign extended to 32 bits,
992    but INTERPRETED AS UNSIGNED. */
993#define umul_ppmm(w1, w0, u, v) \
994    __asm__ ("umul %2,%3,%1;rd %%y,%0" \
995    : "=r" ((USItype)(w1)), \
996    "=r" ((USItype)(w0)) \
997    : "r" ((USItype)(u)), \
998    "r" ((USItype)(v)))
999#define UMUL_TIME 5
1000#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
1001#define udiv_qrnnd(q, r, n1, n0, d) \
1002do { \
1003    USItype __q; \
1004    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
1005    : "=r" ((USItype)(__q)) \
1006    : "r" ((USItype)(n1)), \
1007    "r" ((USItype)(n0)), \
1008    "r" ((USItype)(d))); \
1009    (r) = (n0) - __q * (d); \
1010    (q) = __q; \
1011} while (0)
1012#define UDIV_TIME 25
1013#endif /* SUPERSPARC */
1014#else /* ! __sparc_v8__ */
1015#if defined(__sparclite__)
1016/* This has hardware multiply but not divide. It also has two additional
1017    instructions scan (ffs from high bit) and divscc. */
1018#define umul_ppmm(w1, w0, u, v) \
1019    __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1020    : "=r" ((USItype)(w1)), \
1021    "=r" ((USItype)(w0)) \
1022    : "r" ((USItype)(u)), \
1023    "r" ((USItype)(v)))
1024#define UMUL_TIME 5
1025#define udiv_qrnnd(q, r, n1, n0, d) \
1026    __asm__ ("! Inlined udiv_qrnnd\n" \
1027    "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
1028    "tst %%g0\n" \
1029    "divscc %3,%4,%%g1\n" \
1030    "divscc %%g1,%4,%%g1\n" \
1031    "divscc %%g1,%4,%%g1\n" \
1032    "divscc %%g1,%4,%%g1\n" \
1033    "divscc %%g1,%4,%%g1\n" \
1034    "divscc %%g1,%4,%%g1\n" \
1035    "divscc %%g1,%4,%%g1\n" \
1036    "divscc %%g1,%4,%%g1\n" \
1037    "divscc %%g1,%4,%%g1\n" \
1038    "divscc %%g1,%4,%%g1\n" \
1039    "divscc %%g1,%4,%%g1\n" \
1040    "divscc %%g1,%4,%%g1\n" \
1041    "divscc %%g1,%4,%%g1\n" \
1042    "divscc %%g1,%4,%%g1\n" \
1043    "divscc %%g1,%4,%%g1\n" \
1044    "divscc %%g1,%4,%%g1\n" \
1045    "divscc %%g1,%4,%%g1\n" \
1046    "divscc %%g1,%4,%%g1\n" \
1047    "divscc %%g1,%4,%%g1\n" \
1048    "divscc %%g1,%4,%%g1\n" \
1049    "divscc %%g1,%4,%%g1\n" \
1050    "divscc %%g1,%4,%%g1\n" \
1051    "divscc %%g1,%4,%%g1\n" \
1052    "divscc %%g1,%4,%%g1\n" \
1053    "divscc %%g1,%4,%%g1\n" \
1054    "divscc %%g1,%4,%%g1\n" \
1055    "divscc %%g1,%4,%%g1\n" \
1056    "divscc %%g1,%4,%%g1\n" \
1057    "divscc %%g1,%4,%%g1\n" \
1058    "divscc %%g1,%4,%%g1\n" \
1059    "divscc %%g1,%4,%%g1\n" \
1060    "divscc %%g1,%4,%0\n" \
1061    "rd %%y,%1\n" \
1062    "bl,a 1f\n" \
1063    "add %1,%4,%1\n" \
1064    "1: ! End of inline udiv_qrnnd" \
1065    : "=r" ((USItype)(q)), \
1066    "=r" ((USItype)(r)) \
1067    : "r" ((USItype)(n1)), \
1068    "r" ((USItype)(n0)), \
1069    "rI" ((USItype)(d)) \
1070    : "%g1" __AND_CLOBBER_CC)
1071#define UDIV_TIME 37
1072#endif /* __sparclite__ */
1073#endif /* __sparc_v8__ */
1074    /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
1075#ifndef umul_ppmm
1076#define umul_ppmm(w1, w0, u, v) \
1077    __asm__ ("! Inlined umul_ppmm\n" \
1078    "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \
1079    "sra %3,31,%%g2 ! Don't move this insn\n" \
1080    "and %2,%%g2,%%g2 ! Don't move this insn\n" \
1081    "andcc %%g0,0,%%g1 ! Don't move this insn\n" \
1082    "mulscc %%g1,%3,%%g1\n" \
1083    "mulscc %%g1,%3,%%g1\n" \
1084    "mulscc %%g1,%3,%%g1\n" \
1085    "mulscc %%g1,%3,%%g1\n" \
1086    "mulscc %%g1,%3,%%g1\n" \
1087    "mulscc %%g1,%3,%%g1\n" \
1088    "mulscc %%g1,%3,%%g1\n" \
1089    "mulscc %%g1,%3,%%g1\n" \
1090    "mulscc %%g1,%3,%%g1\n" \
1091    "mulscc %%g1,%3,%%g1\n" \
1092    "mulscc %%g1,%3,%%g1\n" \
1093    "mulscc %%g1,%3,%%g1\n" \
1094    "mulscc %%g1,%3,%%g1\n" \
1095    "mulscc %%g1,%3,%%g1\n" \
1096    "mulscc %%g1,%3,%%g1\n" \
1097    "mulscc %%g1,%3,%%g1\n" \
1098    "mulscc %%g1,%3,%%g1\n" \
1099    "mulscc %%g1,%3,%%g1\n" \
1100    "mulscc %%g1,%3,%%g1\n" \
1101    "mulscc %%g1,%3,%%g1\n" \
1102    "mulscc %%g1,%3,%%g1\n" \
1103    "mulscc %%g1,%3,%%g1\n" \
1104    "mulscc %%g1,%3,%%g1\n" \
1105    "mulscc %%g1,%3,%%g1\n" \
1106    "mulscc %%g1,%3,%%g1\n" \
1107    "mulscc %%g1,%3,%%g1\n" \
1108    "mulscc %%g1,%3,%%g1\n" \
1109    "mulscc %%g1,%3,%%g1\n" \
1110    "mulscc %%g1,%3,%%g1\n" \
1111    "mulscc %%g1,%3,%%g1\n" \
1112    "mulscc %%g1,%3,%%g1\n" \
1113    "mulscc %%g1,%3,%%g1\n" \
1114    "mulscc %%g1,0,%%g1\n" \
1115    "add %%g1,%%g2,%0\n" \
1116    "rd %%y,%1" \
1117    : "=r" ((USItype)(w1)), \
1118    "=r" ((USItype)(w0)) \
1119    : "%rI" ((USItype)(u)), \
1120    "r" ((USItype)(v)) \
1121    : "%g1", "%g2" __AND_CLOBBER_CC)
1122#define UMUL_TIME 39 /* 39 instructions */
1123/* It's quite necessary to add this much assembler for the sparc.
1124   The default udiv_qrnnd (in C) is more than 10 times slower! */
1125#define udiv_qrnnd(q, r, n1, n0, d) \
1126  __asm__ ("! Inlined udiv_qrnnd\n\t" \
1127       "mov 32,%%g1\n\t" \
1128       "subcc %1,%2,%%g0\n\t" \
1129       "1: bcs 5f\n\t" \
1130       "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
1131       "sub %1,%2,%1 ! this kills msb of n\n\t" \
1132       "addx %1,%1,%1 ! so this can't give carry\n\t" \
1133       "subcc %%g1,1,%%g1\n\t" \
1134       "2: bne 1b\n\t" \
1135       "subcc %1,%2,%%g0\n\t" \
1136       "bcs 3f\n\t" \
1137       "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
1138       "b 3f\n\t" \
1139       "sub %1,%2,%1 ! this kills msb of n\n\t" \
1140       "4: sub %1,%2,%1\n\t" \
1141       "5: addxcc %1,%1,%1\n\t" \
1142       "bcc 2b\n\t" \
1143       "subcc %%g1,1,%%g1\n\t" \
1144       "! Got carry from n. Subtract next step to cancel this carry.\n\t" \
1145       "bne 4b\n\t" \
1146       "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \
1147       "sub %1,%2,%1\n\t" \
1148       "3: xnor %0,0,%0\n\t" \
1149       "! End of inline udiv_qrnnd\n" \
1150       : "=&r" ((USItype)(q)), \
1151         "=&r" ((USItype)(r)) \
1152       : "r" ((USItype)(d)), \
1153         "1" ((USItype)(n1)), \
1154         "0" ((USItype)(n0)) : "%g1", "cc")
1155#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
1156#endif
1157#endif /* __sparc__ */
1158
1159/***************************************
1160    ************** VAX ******************
1161    ***************************************/
1162#if defined(__vax__) && W_TYPE_SIZE == 32
1163#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1164    __asm__ ("addl2 %5,%1\n" \
1165    "adwc %3,%0" \
1166    : "=g" ((USItype)(sh)), \
1167    "=&g" ((USItype)(sl)) \
1168    : "%0" ((USItype)(ah)), \
1169    "g" ((USItype)(bh)), \
1170    "%1" ((USItype)(al)), \
1171    "g" ((USItype)(bl)))
1172#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1173    __asm__ ("subl2 %5,%1\n" \
1174    "sbwc %3,%0" \
1175    : "=g" ((USItype)(sh)), \
1176    "=&g" ((USItype)(sl)) \
1177    : "0" ((USItype)(ah)), \
1178    "g" ((USItype)(bh)), \
1179    "1" ((USItype)(al)), \
1180    "g" ((USItype)(bl)))
1181#define umul_ppmm(xh, xl, m0, m1) \
1182do { \
1183    union {UDItype __ll; \
1184    struct {USItype __l, __h; } __i; \
1185    } __xx; \
1186    USItype __m0 = (m0), __m1 = (m1); \
1187    __asm__ ("emul %1,%2,$0,%0" \
1188    : "=g" (__xx.__ll) \
1189    : "g" (__m0), \
1190    "g" (__m1)); \
1191    (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1192    (xh) += ((((SItype) __m0 >> 31) & __m1) \
1193    + (((SItype) __m1 >> 31) & __m0)); \
1194} while (0)
1195#define sdiv_qrnnd(q, r, n1, n0, d) \
1196do { \
1197    union {DItype __ll; \
1198    struct {SItype __l, __h; } __i; \
1199    } __xx; \
1200    __xx.__i.__h = n1; __xx.__i.__l = n0; \
1201    __asm__ ("ediv %3,%2,%0,%1" \
1202    : "=g" (q), "=g" (r) \
1203    : "g" (__xx.__ll), "g" (d)); \
1204} while (0)
1205#endif /* __vax__ */
1206
1207/***************************************
1208    ************** Z8000 ****************
1209    ***************************************/
1210#if defined(__z8000__) && W_TYPE_SIZE == 16
1211#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1212    __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1213    : "=r" ((unsigned int)(sh)), \
1214    "=&r" ((unsigned int)(sl)) \
1215    : "%0" ((unsigned int)(ah)), \
1216    "r" ((unsigned int)(bh)), \
1217    "%1" ((unsigned int)(al)), \
1218    "rQR" ((unsigned int)(bl)))
1219#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1220    __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1221    : "=r" ((unsigned int)(sh)), \
1222    "=&r" ((unsigned int)(sl)) \
1223    : "0" ((unsigned int)(ah)), \
1224    "r" ((unsigned int)(bh)), \
1225    "1" ((unsigned int)(al)), \
1226    "rQR" ((unsigned int)(bl)))
1227#define umul_ppmm(xh, xl, m0, m1) \
1228do { \
1229    union {long int __ll; \
1230    struct {unsigned int __h, __l; } __i; \
1231    } __xx; \
1232    unsigned int __m0 = (m0), __m1 = (m1); \
1233    __asm__ ("mult %S0,%H3" \
1234    : "=r" (__xx.__i.__h), \
1235    "=r" (__xx.__i.__l) \
1236    : "%1" (__m0), \
1237    "rQR" (__m1)); \
1238    (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1239    (xh) += ((((signed int) __m0 >> 15) & __m1) \
1240    + (((signed int) __m1 >> 15) & __m0)); \
1241} while (0)
1242#endif /* __z8000__ */
1243
1244#endif /* __GNUC__ */
1245
1246/***************************************
1247    *********** Generic Versions ********
1248    ***************************************/
1249#if !defined(umul_ppmm) && defined(__umulsidi3)
1250#define umul_ppmm(ph, pl, m0, m1) \
1251{ \
1252    UDWtype __ll = __umulsidi3(m0, m1); \
1253    ph = (UWtype) (__ll >> W_TYPE_SIZE); \
1254    pl = (UWtype) __ll; \
1255}
1256#endif
1257
1258#if !defined(__umulsidi3)
1259#define __umulsidi3(u, v) \
1260    ({UWtype __hi, __lo; \
1261    umul_ppmm(__hi, __lo, u, v); \
1262    ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
1263#endif
1264
1265    /* If this machine has no inline assembler, use C macros. */
1266
1267#if !defined(add_ssaaaa)
1268#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1269do { \
1270    UWtype __x; \
1271    __x = (al) + (bl); \
1272    (sh) = (ah) + (bh) + (__x < (al)); \
1273    (sl) = __x; \
1274} while (0)
1275#endif
1276
1277#if !defined(sub_ddmmss)
1278#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1279do { \
1280    UWtype __x; \
1281    __x = (al) - (bl); \
1282    (sh) = (ah) - (bh) - (__x > (al)); \
1283    (sl) = __x; \
1284} while (0)
1285#endif
1286
1287#if !defined(umul_ppmm)
1288#define umul_ppmm(w1, w0, u, v) \
1289do { \
1290    UWtype __x0, __x1, __x2, __x3; \
1291    UHWtype __ul, __vl, __uh, __vh; \
1292    UWtype __u = (u), __v = (v); \
1293    \
1294    __ul = __ll_lowpart(__u); \
1295    __uh = __ll_highpart(__u); \
1296    __vl = __ll_lowpart(__v); \
1297    __vh = __ll_highpart(__v); \
1298    \
1299    __x0 = (UWtype) __ul * __vl; \
1300    __x1 = (UWtype) __ul * __vh; \
1301    __x2 = (UWtype) __uh * __vl; \
1302    __x3 = (UWtype) __uh * __vh; \
1303    \
1304    __x1 += __ll_highpart(__x0);/* this can't give carry */ \
1305    __x1 += __x2; /* but this indeed can */ \
1306    if (__x1 < __x2) /* did we get it? */ \
1307    __x3 += __ll_B; /* yes, add it in the proper pos. */ \
1308    \
1309    (w1) = __x3 + __ll_highpart(__x1); \
1310    (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
1311} while (0)
1312#endif
1313
1314#if !defined(umul_ppmm)
1315#define smul_ppmm(w1, w0, u, v) \
1316do { \
1317    UWtype __w1; \
1318    UWtype __m0 = (u), __m1 = (v); \
1319    umul_ppmm(__w1, w0, __m0, __m1); \
1320    (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
1321    - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
1322} while (0)
1323#endif
1324
1325    /* Define this unconditionally, so it can be used for debugging. */
1326#define __udiv_qrnnd_c(q, r, n1, n0, d) \
1327do { \
1328    UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
1329    __d1 = __ll_highpart(d); \
1330    __d0 = __ll_lowpart(d); \
1331    \
1332    __r1 = (n1) % __d1; \
1333    __q1 = (n1) / __d1; \
1334    __m = (UWtype) __q1 * __d0; \
1335    __r1 = __r1 * __ll_B | __ll_highpart(n0); \
1336    if (__r1 < __m) { \
1337        __q1--, __r1 += (d); \
1338        if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
1339        if (__r1 < __m) \
1340            __q1--, __r1 += (d); \
1341    } \
1342    __r1 -= __m; \
1343    \
1344    __r0 = __r1 % __d1; \
1345    __q0 = __r1 / __d1; \
1346    __m = (UWtype) __q0 * __d0; \
1347    __r0 = __r0 * __ll_B | __ll_lowpart(n0); \
1348    if (__r0 < __m) { \
1349        __q0--, __r0 += (d); \
1350        if (__r0 >= (d)) \
1351            if (__r0 < __m) \
1352                __q0--, __r0 += (d); \
1353    } \
1354    __r0 -= __m; \
1355    \
1356    (q) = (UWtype) __q1 * __ll_B | __q0; \
1357    (r) = __r0; \
1358} while (0)
1359
1360/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1361    __udiv_w_sdiv (defined in libgcc or elsewhere). */
1362#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
1363#define udiv_qrnnd(q, r, nh, nl, d) \
1364do { \
1365    UWtype __r; \
1366    (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
1367    (r) = __r; \
1368} while (0)
1369#endif
1370
1371    /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1372#if !defined(udiv_qrnnd)
1373#define UDIV_NEEDS_NORMALIZATION 1
1374#define udiv_qrnnd __udiv_qrnnd_c
1375#endif
1376
1377#ifndef UDIV_NEEDS_NORMALIZATION
1378#define UDIV_NEEDS_NORMALIZATION 0
1379#endif
1380

Archive Download this file



interactive