Root/arch/sparc/lib/rem.S

1/*
2 * rem.S: This routine was taken from glibc-1.09 and is covered
3 * by the GNU Library General Public License Version 2.
4 */
5
6
7/* This file is generated from divrem.m4; DO NOT EDIT! */
8/*
9 * Division and remainder, from Appendix E of the Sparc Version 8
10 * Architecture Manual, with fixes from Gordon Irlam.
11 */
12
13/*
14 * Input: dividend and divisor in %o0 and %o1 respectively.
15 *
16 * m4 parameters:
17 * .rem name of function to generate
18 * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
19 * true true=true => signed; true=false => unsigned
20 *
21 * Algorithm parameters:
22 * N how many bits per iteration we try to get (4)
23 * WORDSIZE total number of bits (32)
24 *
25 * Derived constants:
26 * TOPBITS number of bits in the top decade of a number
27 *
28 * Important variables:
29 * Q the partial quotient under development (initially 0)
30 * R the remainder so far, initially the dividend
31 * ITER number of main division loop iterations required;
32 * equal to ceil(log2(quotient) / N). Note that this
33 * is the log base (2^N) of the quotient.
34 * V the current comparand, initially divisor*2^(ITER*N-1)
35 *
36 * Cost:
37 * Current estimate for non-large dividend is
38 * ceil(log2(quotient) / N) * (10 + 7N/2) + C
39 * A large dividend is one greater than 2^(31-TOPBITS) and takes a
40 * different path, as the upper bits of the quotient must be developed
41 * one bit at a time.
42 */
43
44
45    .globl .rem
46    .globl _Rem
47.rem:
48_Rem: /* needed for export */
49    ! compute sign of result; if neither is negative, no problem
50    orcc %o1, %o0, %g0 ! either negative?
51    bge 2f ! no, go do the divide
52     mov %o0, %g2 ! compute sign in any case
53
54    tst %o1
55    bge 1f
56     tst %o0
57    ! %o1 is definitely negative; %o0 might also be negative
58    bge 2f ! if %o0 not negative...
59     sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
601: ! %o0 is negative, %o1 is nonnegative
61    sub %g0, %o0, %o0 ! make %o0 nonnegative
622:
63
64    ! Ready to divide. Compute size of quotient; scale comparand.
65    orcc %o1, %g0, %o5
66    bne 1f
67     mov %o0, %o3
68
69        ! Divide by zero trap. If it returns, return 0 (about as
70        ! wrong as possible, but that is what SunOS does...).
71        ta ST_DIV0
72        retl
73         clr %o0
74
751:
76    cmp %o3, %o5 ! if %o1 exceeds %o0, done
77    blu Lgot_result ! (and algorithm fails otherwise)
78     clr %o2
79
80    sethi %hi(1 << (32 - 4 - 1)), %g1
81
82    cmp %o3, %g1
83    blu Lnot_really_big
84     clr %o4
85
86    ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
87    ! as our usual N-at-a-shot divide step will cause overflow and havoc.
88    ! The number of bits in the result here is N*ITER+SC, where SC <= N.
89    ! Compute ITER in an unorthodox manner: know we need to shift V into
90    ! the top decade: so do not even bother to compare to R.
91    1:
92        cmp %o5, %g1
93        bgeu 3f
94         mov 1, %g7
95
96        sll %o5, 4, %o5
97
98        b 1b
99         add %o4, 1, %o4
100
101    ! Now compute %g7.
102    2:
103        addcc %o5, %o5, %o5
104
105        bcc Lnot_too_big
106         add %g7, 1, %g7
107
108        ! We get here if the %o1 overflowed while shifting.
109        ! This means that %o3 has the high-order bit set.
110        ! Restore %o5 and subtract from %o3.
111        sll %g1, 4, %g1 ! high order bit
112        srl %o5, 1, %o5 ! rest of %o5
113        add %o5, %g1, %o5
114
115        b Ldo_single_div
116         sub %g7, 1, %g7
117
118    Lnot_too_big:
119    3:
120        cmp %o5, %o3
121        blu 2b
122         nop
123
124        be Ldo_single_div
125         nop
126    /* NB: these are commented out in the V8-Sparc manual as well */
127    /* (I do not understand this) */
128    ! %o5 > %o3: went too far: back up 1 step
129    ! srl %o5, 1, %o5
130    ! dec %g7
131    ! do single-bit divide steps
132    !
133    ! We have to be careful here. We know that %o3 >= %o5, so we can do the
134    ! first divide step without thinking. BUT, the others are conditional,
135    ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
136    ! order bit set in the first step, just falling into the regular
137    ! division loop will mess up the first time around.
138    ! So we unroll slightly...
139    Ldo_single_div:
140        subcc %g7, 1, %g7
141        bl Lend_regular_divide
142         nop
143
144        sub %o3, %o5, %o3
145        mov 1, %o2
146
147        b Lend_single_divloop
148         nop
149    Lsingle_divloop:
150        sll %o2, 1, %o2
151
152        bl 1f
153         srl %o5, 1, %o5
154        ! %o3 >= 0
155        sub %o3, %o5, %o3
156
157        b 2f
158         add %o2, 1, %o2
159    1: ! %o3 < 0
160        add %o3, %o5, %o3
161        sub %o2, 1, %o2
162    2:
163    Lend_single_divloop:
164        subcc %g7, 1, %g7
165        bge Lsingle_divloop
166         tst %o3
167
168        b,a Lend_regular_divide
169
170Lnot_really_big:
1711:
172    sll %o5, 4, %o5
173    cmp %o5, %o3
174    bleu 1b
175     addcc %o4, 1, %o4
176    be Lgot_result
177     sub %o4, 1, %o4
178
179    tst %o3 ! set up for initial iteration
180Ldivloop:
181    sll %o2, 4, %o2
182        ! depth 1, accumulated bits 0
183    bl L.1.16
184     srl %o5,1,%o5
185    ! remainder is positive
186    subcc %o3,%o5,%o3
187            ! depth 2, accumulated bits 1
188    bl L.2.17
189     srl %o5,1,%o5
190    ! remainder is positive
191    subcc %o3,%o5,%o3
192            ! depth 3, accumulated bits 3
193    bl L.3.19
194     srl %o5,1,%o5
195    ! remainder is positive
196    subcc %o3,%o5,%o3
197            ! depth 4, accumulated bits 7
198    bl L.4.23
199     srl %o5,1,%o5
200    ! remainder is positive
201    subcc %o3,%o5,%o3
202
203    b 9f
204     add %o2, (7*2+1), %o2
205    
206L.4.23:
207    ! remainder is negative
208    addcc %o3,%o5,%o3
209    b 9f
210     add %o2, (7*2-1), %o2
211    
212L.3.19:
213    ! remainder is negative
214    addcc %o3,%o5,%o3
215            ! depth 4, accumulated bits 5
216    bl L.4.21
217     srl %o5,1,%o5
218    ! remainder is positive
219    subcc %o3,%o5,%o3
220    b 9f
221     add %o2, (5*2+1), %o2
222    
223L.4.21:
224    ! remainder is negative
225    addcc %o3,%o5,%o3
226    b 9f
227     add %o2, (5*2-1), %o2
228    
229L.2.17:
230    ! remainder is negative
231    addcc %o3,%o5,%o3
232            ! depth 3, accumulated bits 1
233    bl L.3.17
234     srl %o5,1,%o5
235    ! remainder is positive
236    subcc %o3,%o5,%o3
237            ! depth 4, accumulated bits 3
238    bl L.4.19
239     srl %o5,1,%o5
240    ! remainder is positive
241    subcc %o3,%o5,%o3
242    b 9f
243     add %o2, (3*2+1), %o2
244
245L.4.19:
246    ! remainder is negative
247    addcc %o3,%o5,%o3
248    b 9f
249     add %o2, (3*2-1), %o2
250
251L.3.17:
252    ! remainder is negative
253    addcc %o3,%o5,%o3
254            ! depth 4, accumulated bits 1
255    bl L.4.17
256     srl %o5,1,%o5
257    ! remainder is positive
258    subcc %o3,%o5,%o3
259    b 9f
260     add %o2, (1*2+1), %o2
261
262L.4.17:
263    ! remainder is negative
264    addcc %o3,%o5,%o3
265    b 9f
266     add %o2, (1*2-1), %o2
267
268L.1.16:
269    ! remainder is negative
270    addcc %o3,%o5,%o3
271            ! depth 2, accumulated bits -1
272    bl L.2.15
273     srl %o5,1,%o5
274    ! remainder is positive
275    subcc %o3,%o5,%o3
276            ! depth 3, accumulated bits -1
277    bl L.3.15
278     srl %o5,1,%o5
279    ! remainder is positive
280    subcc %o3,%o5,%o3
281            ! depth 4, accumulated bits -1
282    bl L.4.15
283     srl %o5,1,%o5
284    ! remainder is positive
285    subcc %o3,%o5,%o3
286    b 9f
287     add %o2, (-1*2+1), %o2
288
289L.4.15:
290    ! remainder is negative
291    addcc %o3,%o5,%o3
292    b 9f
293     add %o2, (-1*2-1), %o2
294
295L.3.15:
296    ! remainder is negative
297    addcc %o3,%o5,%o3
298            ! depth 4, accumulated bits -3
299    bl L.4.13
300     srl %o5,1,%o5
301    ! remainder is positive
302    subcc %o3,%o5,%o3
303    b 9f
304     add %o2, (-3*2+1), %o2
305
306L.4.13:
307    ! remainder is negative
308    addcc %o3,%o5,%o3
309    b 9f
310     add %o2, (-3*2-1), %o2
311
312L.2.15:
313    ! remainder is negative
314    addcc %o3,%o5,%o3
315            ! depth 3, accumulated bits -3
316    bl L.3.13
317     srl %o5,1,%o5
318    ! remainder is positive
319    subcc %o3,%o5,%o3
320            ! depth 4, accumulated bits -5
321    bl L.4.11
322     srl %o5,1,%o5
323    ! remainder is positive
324    subcc %o3,%o5,%o3
325    b 9f
326     add %o2, (-5*2+1), %o2
327
328L.4.11:
329    ! remainder is negative
330    addcc %o3,%o5,%o3
331    b 9f
332     add %o2, (-5*2-1), %o2
333
334
335L.3.13:
336    ! remainder is negative
337    addcc %o3,%o5,%o3
338            ! depth 4, accumulated bits -7
339    bl L.4.9
340     srl %o5,1,%o5
341    ! remainder is positive
342    subcc %o3,%o5,%o3
343    b 9f
344     add %o2, (-7*2+1), %o2
345
346L.4.9:
347    ! remainder is negative
348    addcc %o3,%o5,%o3
349    b 9f
350     add %o2, (-7*2-1), %o2
351
352    9:
353Lend_regular_divide:
354    subcc %o4, 1, %o4
355    bge Ldivloop
356     tst %o3
357
358    bl,a Lgot_result
359    ! non-restoring fixup here (one instruction only!)
360    add %o3, %o1, %o3
361
362Lgot_result:
363    ! check to see if answer should be < 0
364    tst %g2
365    bl,a 1f
366     sub %g0, %o3, %o3
3671:
368    retl
369     mov %o3, %o0
370
371    .globl .rem_patch
372.rem_patch:
373    sra %o0, 0x1f, %o4
374    wr %o4, 0x0, %y
375    nop
376    nop
377    nop
378    sdivcc %o0, %o1, %o2
379    bvs,a 1f
380     xnor %o2, %g0, %o2
3811: smul %o2, %o1, %o2
382    retl
383     sub %o0, %o2, %o0
384    nop
385

Archive Download this file



interactive