Root/package/px5g/src/polarssl/bn_mul.h

1/**
2 * \file bn_mul.h
3 *
4 * Based on XySSL: Copyright (C) 2006-2008 Christophe Devine
5 *
6 * Copyright (C) 2009 Paul Bakker <polarssl_maintainer at polarssl dot org>
7 *
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * * Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * * Neither the names of PolarSSL or XySSL nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35/*
36 * Multiply source vector [s] with b, add result
37 * to destination vector [d] and set carry c.
38 *
39 * Currently supports:
40 *
41 * . IA-32 (386+) . AMD64 / EM64T
42 * . IA-32 (SSE2) . Motorola 68000
43 * . PowerPC, 32-bit . MicroBlaze
44 * . PowerPC, 64-bit . TriCore
45 * . SPARC v8 . ARM v3+
46 * . Alpha . MIPS32
47 * . C, longlong . C, generic
48 */
49#ifndef POLARSSL_BN_MUL_H
50#define POLARSSL_BN_MUL_H
51
52#include "polarssl/config.h"
53
54#if defined(POLARSSL_HAVE_ASM)
55
56#if defined(__GNUC__)
57#if defined(__i386__)
58
59#define MULADDC_INIT \
60    asm( "movl %%ebx, %0 " : "=m" (t)); \
61    asm( "movl %0, %%esi " :: "m" (s)); \
62    asm( "movl %0, %%edi " :: "m" (d)); \
63    asm( "movl %0, %%ecx " :: "m" (c)); \
64    asm( "movl %0, %%ebx " :: "m" (b));
65
66#define MULADDC_CORE \
67    asm( "lodsl " ); \
68    asm( "mull %ebx " ); \
69    asm( "addl %ecx, %eax " ); \
70    asm( "adcl $0, %edx " ); \
71    asm( "addl (%edi), %eax " ); \
72    asm( "adcl $0, %edx " ); \
73    asm( "movl %edx, %ecx " ); \
74    asm( "stosl " );
75
76#if defined(POLARSSL_HAVE_SSE2)
77
78#define MULADDC_HUIT \
79    asm( "movd %ecx, %mm1 " ); \
80    asm( "movd %ebx, %mm0 " ); \
81    asm( "movd (%edi), %mm3 " ); \
82    asm( "paddq %mm3, %mm1 " ); \
83    asm( "movd (%esi), %mm2 " ); \
84    asm( "pmuludq %mm0, %mm2 " ); \
85    asm( "movd 4(%esi), %mm4 " ); \
86    asm( "pmuludq %mm0, %mm4 " ); \
87    asm( "movd 8(%esi), %mm6 " ); \
88    asm( "pmuludq %mm0, %mm6 " ); \
89    asm( "movd 12(%esi), %mm7 " ); \
90    asm( "pmuludq %mm0, %mm7 " ); \
91    asm( "paddq %mm2, %mm1 " ); \
92    asm( "movd 4(%edi), %mm3 " ); \
93    asm( "paddq %mm4, %mm3 " ); \
94    asm( "movd 8(%edi), %mm5 " ); \
95    asm( "paddq %mm6, %mm5 " ); \
96    asm( "movd 12(%edi), %mm4 " ); \
97    asm( "paddq %mm4, %mm7 " ); \
98    asm( "movd %mm1, (%edi) " ); \
99    asm( "movd 16(%esi), %mm2 " ); \
100    asm( "pmuludq %mm0, %mm2 " ); \
101    asm( "psrlq $32, %mm1 " ); \
102    asm( "movd 20(%esi), %mm4 " ); \
103    asm( "pmuludq %mm0, %mm4 " ); \
104    asm( "paddq %mm3, %mm1 " ); \
105    asm( "movd 24(%esi), %mm6 " ); \
106    asm( "pmuludq %mm0, %mm6 " ); \
107    asm( "movd %mm1, 4(%edi) " ); \
108    asm( "psrlq $32, %mm1 " ); \
109    asm( "movd 28(%esi), %mm3 " ); \
110    asm( "pmuludq %mm0, %mm3 " ); \
111    asm( "paddq %mm5, %mm1 " ); \
112    asm( "movd 16(%edi), %mm5 " ); \
113    asm( "paddq %mm5, %mm2 " ); \
114    asm( "movd %mm1, 8(%edi) " ); \
115    asm( "psrlq $32, %mm1 " ); \
116    asm( "paddq %mm7, %mm1 " ); \
117    asm( "movd 20(%edi), %mm5 " ); \
118    asm( "paddq %mm5, %mm4 " ); \
119    asm( "movd %mm1, 12(%edi) " ); \
120    asm( "psrlq $32, %mm1 " ); \
121    asm( "paddq %mm2, %mm1 " ); \
122    asm( "movd 24(%edi), %mm5 " ); \
123    asm( "paddq %mm5, %mm6 " ); \
124    asm( "movd %mm1, 16(%edi) " ); \
125    asm( "psrlq $32, %mm1 " ); \
126    asm( "paddq %mm4, %mm1 " ); \
127    asm( "movd 28(%edi), %mm5 " ); \
128    asm( "paddq %mm5, %mm3 " ); \
129    asm( "movd %mm1, 20(%edi) " ); \
130    asm( "psrlq $32, %mm1 " ); \
131    asm( "paddq %mm6, %mm1 " ); \
132    asm( "movd %mm1, 24(%edi) " ); \
133    asm( "psrlq $32, %mm1 " ); \
134    asm( "paddq %mm3, %mm1 " ); \
135    asm( "movd %mm1, 28(%edi) " ); \
136    asm( "addl $32, %edi " ); \
137    asm( "addl $32, %esi " ); \
138    asm( "psrlq $32, %mm1 " ); \
139    asm( "movd %mm1, %ecx " );
140
141#define MULADDC_STOP \
142    asm( "emms " ); \
143    asm( "movl %0, %%ebx " :: "m" (t)); \
144    asm( "movl %%ecx, %0 " : "=m" (c)); \
145    asm( "movl %%edi, %0 " : "=m" (d)); \
146    asm( "movl %%esi, %0 " : "=m" (s) :: \
147    "eax", "ecx", "edx", "esi", "edi" );
148
149#else
150
151#define MULADDC_STOP \
152    asm( "movl %0, %%ebx " :: "m" (t)); \
153    asm( "movl %%ecx, %0 " : "=m" (c)); \
154    asm( "movl %%edi, %0 " : "=m" (d)); \
155    asm( "movl %%esi, %0 " : "=m" (s) :: \
156    "eax", "ecx", "edx", "esi", "edi" );
157
158#endif /* SSE2 */
159#endif /* i386 */
160
161#if defined(__amd64__) || defined (__x86_64__)
162
163#define MULADDC_INIT \
164    asm( "movq %0, %%rsi " :: "m" (s)); \
165    asm( "movq %0, %%rdi " :: "m" (d)); \
166    asm( "movq %0, %%rcx " :: "m" (c)); \
167    asm( "movq %0, %%rbx " :: "m" (b)); \
168    asm( "xorq %r8, %r8 " );
169
170#define MULADDC_CORE \
171    asm( "movq (%rsi),%rax " ); \
172    asm( "mulq %rbx " ); \
173    asm( "addq $8, %rsi " ); \
174    asm( "addq %rcx, %rax " ); \
175    asm( "movq %r8, %rcx " ); \
176    asm( "adcq $0, %rdx " ); \
177    asm( "nop " ); \
178    asm( "addq %rax, (%rdi) " ); \
179    asm( "adcq %rdx, %rcx " ); \
180    asm( "addq $8, %rdi " );
181
182#define MULADDC_STOP \
183    asm( "movq %%rcx, %0 " : "=m" (c)); \
184    asm( "movq %%rdi, %0 " : "=m" (d)); \
185    asm( "movq %%rsi, %0 " : "=m" (s) :: \
186    "rax", "rcx", "rdx", "rbx", "rsi", "rdi", "r8" );
187
188#endif /* AMD64 */
189
190#if defined(__mc68020__) || defined(__mcpu32__)
191
192#define MULADDC_INIT \
193    asm( "movl %0, %%a2 " :: "m" (s)); \
194    asm( "movl %0, %%a3 " :: "m" (d)); \
195    asm( "movl %0, %%d3 " :: "m" (c)); \
196    asm( "movl %0, %%d2 " :: "m" (b)); \
197    asm( "moveq #0, %d0 " );
198
199#define MULADDC_CORE \
200    asm( "movel %a2@+, %d1 " ); \
201    asm( "mulul %d2, %d4:%d1 " ); \
202    asm( "addl %d3, %d1 " ); \
203    asm( "addxl %d0, %d4 " ); \
204    asm( "moveq #0, %d3 " ); \
205    asm( "addl %d1, %a3@+ " ); \
206    asm( "addxl %d4, %d3 " );
207
208#define MULADDC_STOP \
209    asm( "movl %%d3, %0 " : "=m" (c)); \
210    asm( "movl %%a3, %0 " : "=m" (d)); \
211    asm( "movl %%a2, %0 " : "=m" (s) :: \
212    "d0", "d1", "d2", "d3", "d4", "a2", "a3" );
213
214#define MULADDC_HUIT \
215    asm( "movel %a2@+, %d1 " ); \
216    asm( "mulul %d2, %d4:%d1 " ); \
217    asm( "addxl %d3, %d1 " ); \
218    asm( "addxl %d0, %d4 " ); \
219    asm( "addl %d1, %a3@+ " ); \
220    asm( "movel %a2@+, %d1 " ); \
221    asm( "mulul %d2, %d3:%d1 " ); \
222    asm( "addxl %d4, %d1 " ); \
223    asm( "addxl %d0, %d3 " ); \
224    asm( "addl %d1, %a3@+ " ); \
225    asm( "movel %a2@+, %d1 " ); \
226    asm( "mulul %d2, %d4:%d1 " ); \
227    asm( "addxl %d3, %d1 " ); \
228    asm( "addxl %d0, %d4 " ); \
229    asm( "addl %d1, %a3@+ " ); \
230    asm( "movel %a2@+, %d1 " ); \
231    asm( "mulul %d2, %d3:%d1 " ); \
232    asm( "addxl %d4, %d1 " ); \
233    asm( "addxl %d0, %d3 " ); \
234    asm( "addl %d1, %a3@+ " ); \
235    asm( "movel %a2@+, %d1 " ); \
236    asm( "mulul %d2, %d4:%d1 " ); \
237    asm( "addxl %d3, %d1 " ); \
238    asm( "addxl %d0, %d4 " ); \
239    asm( "addl %d1, %a3@+ " ); \
240    asm( "movel %a2@+, %d1 " ); \
241    asm( "mulul %d2, %d3:%d1 " ); \
242    asm( "addxl %d4, %d1 " ); \
243    asm( "addxl %d0, %d3 " ); \
244    asm( "addl %d1, %a3@+ " ); \
245    asm( "movel %a2@+, %d1 " ); \
246    asm( "mulul %d2, %d4:%d1 " ); \
247    asm( "addxl %d3, %d1 " ); \
248    asm( "addxl %d0, %d4 " ); \
249    asm( "addl %d1, %a3@+ " ); \
250    asm( "movel %a2@+, %d1 " ); \
251    asm( "mulul %d2, %d3:%d1 " ); \
252    asm( "addxl %d4, %d1 " ); \
253    asm( "addxl %d0, %d3 " ); \
254    asm( "addl %d1, %a3@+ " ); \
255    asm( "addxl %d0, %d3 " );
256
257#endif /* MC68000 */
258
259#if defined(__powerpc__) || defined(__ppc__)
260#if defined(__powerpc64__) || defined(__ppc64__)
261
262#if defined(__MACH__) && defined(__APPLE__)
263
264#define MULADDC_INIT \
265    asm( "ld r3, %0 " :: "m" (s)); \
266    asm( "ld r4, %0 " :: "m" (d)); \
267    asm( "ld r5, %0 " :: "m" (c)); \
268    asm( "ld r6, %0 " :: "m" (b)); \
269    asm( "addi r3, r3, -8 " ); \
270    asm( "addi r4, r4, -8 " ); \
271    asm( "addic r5, r5, 0 " );
272
273#define MULADDC_CORE \
274    asm( "ldu r7, 8(r3) " ); \
275    asm( "mulld r8, r7, r6 " ); \
276    asm( "mulhdu r9, r7, r6 " ); \
277    asm( "adde r8, r8, r5 " ); \
278    asm( "ld r7, 8(r4) " ); \
279    asm( "addze r5, r9 " ); \
280    asm( "addc r8, r8, r7 " ); \
281    asm( "stdu r8, 8(r4) " );
282
283#define MULADDC_STOP \
284    asm( "addze r5, r5 " ); \
285    asm( "addi r4, r4, 8 " ); \
286    asm( "addi r3, r3, 8 " ); \
287    asm( "std r5, %0 " : "=m" (c)); \
288    asm( "std r4, %0 " : "=m" (d)); \
289    asm( "std r3, %0 " : "=m" (s) :: \
290    "r3", "r4", "r5", "r6", "r7", "r8", "r9" );
291
292#else
293
294#define MULADDC_INIT \
295    asm( "ld %%r3, %0 " :: "m" (s)); \
296    asm( "ld %%r4, %0 " :: "m" (d)); \
297    asm( "ld %%r5, %0 " :: "m" (c)); \
298    asm( "ld %%r6, %0 " :: "m" (b)); \
299    asm( "addi %r3, %r3, -8 " ); \
300    asm( "addi %r4, %r4, -8 " ); \
301    asm( "addic %r5, %r5, 0 " );
302
303#define MULADDC_CORE \
304    asm( "ldu %r7, 8(%r3) " ); \
305    asm( "mulld %r8, %r7, %r6 " ); \
306    asm( "mulhdu %r9, %r7, %r6 " ); \
307    asm( "adde %r8, %r8, %r5 " ); \
308    asm( "ld %r7, 8(%r4) " ); \
309    asm( "addze %r5, %r9 " ); \
310    asm( "addc %r8, %r8, %r7 " ); \
311    asm( "stdu %r8, 8(%r4) " );
312
313#define MULADDC_STOP \
314    asm( "addze %r5, %r5 " ); \
315    asm( "addi %r4, %r4, 8 " ); \
316    asm( "addi %r3, %r3, 8 " ); \
317    asm( "std %%r5, %0 " : "=m" (c)); \
318    asm( "std %%r4, %0 " : "=m" (d)); \
319    asm( "std %%r3, %0 " : "=m" (s) :: \
320    "r3", "r4", "r5", "r6", "r7", "r8", "r9" );
321
322#endif
323
324#else /* PPC32 */
325
326#if defined(__MACH__) && defined(__APPLE__)
327
328#define MULADDC_INIT \
329    asm( "lwz r3, %0 " :: "m" (s)); \
330    asm( "lwz r4, %0 " :: "m" (d)); \
331    asm( "lwz r5, %0 " :: "m" (c)); \
332    asm( "lwz r6, %0 " :: "m" (b)); \
333    asm( "addi r3, r3, -4 " ); \
334    asm( "addi r4, r4, -4 " ); \
335    asm( "addic r5, r5, 0 " );
336
337#define MULADDC_CORE \
338    asm( "lwzu r7, 4(r3) " ); \
339    asm( "mullw r8, r7, r6 " ); \
340    asm( "mulhwu r9, r7, r6 " ); \
341    asm( "adde r8, r8, r5 " ); \
342    asm( "lwz r7, 4(r4) " ); \
343    asm( "addze r5, r9 " ); \
344    asm( "addc r8, r8, r7 " ); \
345    asm( "stwu r8, 4(r4) " );
346
347#define MULADDC_STOP \
348    asm( "addze r5, r5 " ); \
349    asm( "addi r4, r4, 4 " ); \
350    asm( "addi r3, r3, 4 " ); \
351    asm( "stw r5, %0 " : "=m" (c)); \
352    asm( "stw r4, %0 " : "=m" (d)); \
353    asm( "stw r3, %0 " : "=m" (s) :: \
354    "r3", "r4", "r5", "r6", "r7", "r8", "r9" );
355
356#else
357
358#define MULADDC_INIT \
359    asm( "lwz %%r3, %0 " :: "m" (s)); \
360    asm( "lwz %%r4, %0 " :: "m" (d)); \
361    asm( "lwz %%r5, %0 " :: "m" (c)); \
362    asm( "lwz %%r6, %0 " :: "m" (b)); \
363    asm( "addi %r3, %r3, -4 " ); \
364    asm( "addi %r4, %r4, -4 " ); \
365    asm( "addic %r5, %r5, 0 " );
366
367#define MULADDC_CORE \
368    asm( "lwzu %r7, 4(%r3) " ); \
369    asm( "mullw %r8, %r7, %r6 " ); \
370    asm( "mulhwu %r9, %r7, %r6 " ); \
371    asm( "adde %r8, %r8, %r5 " ); \
372    asm( "lwz %r7, 4(%r4) " ); \
373    asm( "addze %r5, %r9 " ); \
374    asm( "addc %r8, %r8, %r7 " ); \
375    asm( "stwu %r8, 4(%r4) " );
376
377#define MULADDC_STOP \
378    asm( "addze %r5, %r5 " ); \
379    asm( "addi %r4, %r4, 4 " ); \
380    asm( "addi %r3, %r3, 4 " ); \
381    asm( "stw %%r5, %0 " : "=m" (c)); \
382    asm( "stw %%r4, %0 " : "=m" (d)); \
383    asm( "stw %%r3, %0 " : "=m" (s) :: \
384    "r3", "r4", "r5", "r6", "r7", "r8", "r9" );
385
386#endif
387
388#endif /* PPC32 */
389#endif /* PPC64 */
390
391#if defined(__sparc__)
392
393#define MULADDC_INIT \
394    asm( "ld %0, %%o0 " :: "m" (s)); \
395    asm( "ld %0, %%o1 " :: "m" (d)); \
396    asm( "ld %0, %%o2 " :: "m" (c)); \
397    asm( "ld %0, %%o3 " :: "m" (b));
398
399#define MULADDC_CORE \
400    asm( "ld [%o0], %o4 " ); \
401    asm( "inc 4, %o0 " ); \
402    asm( "ld [%o1], %o5 " ); \
403    asm( "umul %o3, %o4, %o4 " ); \
404    asm( "addcc %o4, %o2, %o4 " ); \
405    asm( "rd %y, %g1 " ); \
406    asm( "addx %g1, 0, %g1 " ); \
407    asm( "addcc %o4, %o5, %o4 " ); \
408    asm( "st %o4, [%o1] " ); \
409    asm( "addx %g1, 0, %o2 " ); \
410    asm( "inc 4, %o1 " );
411
412#define MULADDC_STOP \
413    asm( "st %%o2, %0 " : "=m" (c)); \
414    asm( "st %%o1, %0 " : "=m" (d)); \
415    asm( "st %%o0, %0 " : "=m" (s) :: \
416    "g1", "o0", "o1", "o2", "o3", "o4", "o5" );
417
418#endif /* SPARCv8 */
419
420#if defined(__microblaze__) || defined(microblaze)
421
422#define MULADDC_INIT \
423    asm( "lwi r3, %0 " :: "m" (s)); \
424    asm( "lwi r4, %0 " :: "m" (d)); \
425    asm( "lwi r5, %0 " :: "m" (c)); \
426    asm( "lwi r6, %0 " :: "m" (b)); \
427    asm( "andi r7, r6, 0xffff" ); \
428    asm( "bsrli r6, r6, 16 " );
429
430#define MULADDC_CORE \
431    asm( "lhui r8, r3, 0 " ); \
432    asm( "addi r3, r3, 2 " ); \
433    asm( "lhui r9, r3, 0 " ); \
434    asm( "addi r3, r3, 2 " ); \
435    asm( "mul r10, r9, r6 " ); \
436    asm( "mul r11, r8, r7 " ); \
437    asm( "mul r12, r9, r7 " ); \
438    asm( "mul r13, r8, r6 " ); \
439    asm( "bsrli r8, r10, 16 " ); \
440    asm( "bsrli r9, r11, 16 " ); \
441    asm( "add r13, r13, r8 " ); \
442    asm( "add r13, r13, r9 " ); \
443    asm( "bslli r10, r10, 16 " ); \
444    asm( "bslli r11, r11, 16 " ); \
445    asm( "add r12, r12, r10 " ); \
446    asm( "addc r13, r13, r0 " ); \
447    asm( "add r12, r12, r11 " ); \
448    asm( "addc r13, r13, r0 " ); \
449    asm( "lwi r10, r4, 0 " ); \
450    asm( "add r12, r12, r10 " ); \
451    asm( "addc r13, r13, r0 " ); \
452    asm( "add r12, r12, r5 " ); \
453    asm( "addc r5, r13, r0 " ); \
454    asm( "swi r12, r4, 0 " ); \
455    asm( "addi r4, r4, 4 " );
456
457#define MULADDC_STOP \
458    asm( "swi r5, %0 " : "=m" (c)); \
459    asm( "swi r4, %0 " : "=m" (d)); \
460    asm( "swi r3, %0 " : "=m" (s) :: \
461     "r3", "r4" , "r5" , "r6" , "r7" , "r8" , \
462     "r9", "r10", "r11", "r12", "r13" );
463
464#endif /* MicroBlaze */
465
466#if defined(__tricore__)
467
468#define MULADDC_INIT \
469    asm( "ld.a %%a2, %0 " :: "m" (s)); \
470    asm( "ld.a %%a3, %0 " :: "m" (d)); \
471    asm( "ld.w %%d4, %0 " :: "m" (c)); \
472    asm( "ld.w %%d1, %0 " :: "m" (b)); \
473    asm( "xor %d5, %d5 " );
474
475#define MULADDC_CORE \
476    asm( "ld.w %d0, [%a2+] " ); \
477    asm( "madd.u %e2, %e4, %d0, %d1 " ); \
478    asm( "ld.w %d0, [%a3] " ); \
479    asm( "addx %d2, %d2, %d0 " ); \
480    asm( "addc %d3, %d3, 0 " ); \
481    asm( "mov %d4, %d3 " ); \
482    asm( "st.w [%a3+], %d2 " );
483
484#define MULADDC_STOP \
485    asm( "st.w %0, %%d4 " : "=m" (c)); \
486    asm( "st.a %0, %%a3 " : "=m" (d)); \
487    asm( "st.a %0, %%a2 " : "=m" (s) :: \
488    "d0", "d1", "e2", "d4", "a2", "a3" );
489
490#endif /* TriCore */
491
492#if defined(__arm__)
493
494#define MULADDC_INIT \
495    asm( "ldr r0, %0 " :: "m" (s)); \
496    asm( "ldr r1, %0 " :: "m" (d)); \
497    asm( "ldr r2, %0 " :: "m" (c)); \
498    asm( "ldr r3, %0 " :: "m" (b));
499
500#define MULADDC_CORE \
501    asm( "ldr r4, [r0], #4 " ); \
502    asm( "mov r5, #0 " ); \
503    asm( "ldr r6, [r1] " ); \
504    asm( "umlal r2, r5, r3, r4 " ); \
505    asm( "adds r7, r6, r2 " ); \
506    asm( "adc r2, r5, #0 " ); \
507    asm( "str r7, [r1], #4 " );
508
509#define MULADDC_STOP \
510    asm( "str r2, %0 " : "=m" (c)); \
511    asm( "str r1, %0 " : "=m" (d)); \
512    asm( "str r0, %0 " : "=m" (s) :: \
513    "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7" );
514
515#endif /* ARMv3 */
516
517#if defined(__alpha__)
518
519#define MULADDC_INIT \
520    asm( "ldq $1, %0 " :: "m" (s)); \
521    asm( "ldq $2, %0 " :: "m" (d)); \
522    asm( "ldq $3, %0 " :: "m" (c)); \
523    asm( "ldq $4, %0 " :: "m" (b));
524
525#define MULADDC_CORE \
526    asm( "ldq $6, 0($1) " ); \
527    asm( "addq $1, 8, $1 " ); \
528    asm( "mulq $6, $4, $7 " ); \
529    asm( "umulh $6, $4, $6 " ); \
530    asm( "addq $7, $3, $7 " ); \
531    asm( "cmpult $7, $3, $3 " ); \
532    asm( "ldq $5, 0($2) " ); \
533    asm( "addq $7, $5, $7 " ); \
534    asm( "cmpult $7, $5, $5 " ); \
535    asm( "stq $7, 0($2) " ); \
536    asm( "addq $2, 8, $2 " ); \
537    asm( "addq $6, $3, $3 " ); \
538    asm( "addq $5, $3, $3 " );
539
540#define MULADDC_STOP \
541    asm( "stq $3, %0 " : "=m" (c)); \
542    asm( "stq $2, %0 " : "=m" (d)); \
543    asm( "stq $1, %0 " : "=m" (s) :: \
544    "$1", "$2", "$3", "$4", "$5", "$6", "$7" );
545
546#endif /* Alpha */
547
548#if defined(__mips__)
549
550#define MULADDC_INIT \
551    asm( "lw $10, %0 " :: "m" (s)); \
552    asm( "lw $11, %0 " :: "m" (d)); \
553    asm( "lw $12, %0 " :: "m" (c)); \
554    asm( "lw $13, %0 " :: "m" (b));
555
556#define MULADDC_CORE \
557    asm( "lw $14, 0($10) " ); \
558    asm( "multu $13, $14 " ); \
559    asm( "addi $10, $10, 4 " ); \
560    asm( "mflo $14 " ); \
561    asm( "mfhi $9 " ); \
562    asm( "addu $14, $12, $14 " ); \
563    asm( "lw $15, 0($11) " ); \
564    asm( "sltu $12, $14, $12 " ); \
565    asm( "addu $15, $14, $15 " ); \
566    asm( "sltu $14, $15, $14 " ); \
567    asm( "addu $12, $12, $9 " ); \
568    asm( "sw $15, 0($11) " ); \
569    asm( "addu $12, $12, $14 " ); \
570    asm( "addi $11, $11, 4 " );
571
572#define MULADDC_STOP \
573    asm( "sw $12, %0 " : "=m" (c)); \
574    asm( "sw $11, %0 " : "=m" (d)); \
575    asm( "sw $10, %0 " : "=m" (s) :: \
576    "$9", "$10", "$11", "$12", "$13", "$14", "$15" );
577
578#endif /* MIPS */
579#endif /* GNUC */
580
581#if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
582
583#define MULADDC_INIT \
584    __asm mov esi, s \
585    __asm mov edi, d \
586    __asm mov ecx, c \
587    __asm mov ebx, b
588
589#define MULADDC_CORE \
590    __asm lodsd \
591    __asm mul ebx \
592    __asm add eax, ecx \
593    __asm adc edx, 0 \
594    __asm add eax, [edi] \
595    __asm adc edx, 0 \
596    __asm mov ecx, edx \
597    __asm stosd
598
599#if defined(POLARSSL_HAVE_SSE2)
600
601#define EMIT __asm _emit
602
603#define MULADDC_HUIT \
604    EMIT 0x0F EMIT 0x6E EMIT 0xC9 \
605    EMIT 0x0F EMIT 0x6E EMIT 0xC3 \
606    EMIT 0x0F EMIT 0x6E EMIT 0x1F \
607    EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
608    EMIT 0x0F EMIT 0x6E EMIT 0x16 \
609    EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
610    EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \
611    EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
612    EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \
613    EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
614    EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \
615    EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \
616    EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
617    EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \
618    EMIT 0x0F EMIT 0xD4 EMIT 0xDC \
619    EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \
620    EMIT 0x0F EMIT 0xD4 EMIT 0xEE \
621    EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \
622    EMIT 0x0F EMIT 0xD4 EMIT 0xFC \
623    EMIT 0x0F EMIT 0x7E EMIT 0x0F \
624    EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \
625    EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
626    EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
627    EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \
628    EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
629    EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
630    EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \
631    EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
632    EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \
633    EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
634    EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \
635    EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \
636    EMIT 0x0F EMIT 0xD4 EMIT 0xCD \
637    EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \
638    EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \
639    EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \
640    EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
641    EMIT 0x0F EMIT 0xD4 EMIT 0xCF \
642    EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \
643    EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \
644    EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \
645    EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
646    EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
647    EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \
648    EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \
649    EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \
650    EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
651    EMIT 0x0F EMIT 0xD4 EMIT 0xCC \
652    EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \
653    EMIT 0x0F EMIT 0xD4 EMIT 0xDD \
654    EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \
655    EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
656    EMIT 0x0F EMIT 0xD4 EMIT 0xCE \
657    EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \
658    EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
659    EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
660    EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \
661    EMIT 0x83 EMIT 0xC7 EMIT 0x20 \
662    EMIT 0x83 EMIT 0xC6 EMIT 0x20 \
663    EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
664    EMIT 0x0F EMIT 0x7E EMIT 0xC9
665
666#define MULADDC_STOP \
667    EMIT 0x0F EMIT 0x77 \
668    __asm mov c, ecx \
669    __asm mov d, edi \
670    __asm mov s, esi \
671
672#else
673
674#define MULADDC_STOP \
675    __asm mov c, ecx \
676    __asm mov d, edi \
677    __asm mov s, esi \
678
679#endif /* SSE2 */
680#endif /* MSVC */
681
682#endif /* POLARSSL_HAVE_ASM */
683
684#if !defined(MULADDC_CORE)
685#if defined(POLARSSL_HAVE_LONGLONG)
686
687#define MULADDC_INIT \
688{ \
689    t_dbl r; \
690    t_int r0, r1;
691
692#define MULADDC_CORE \
693    r = *(s++) * (t_dbl) b; \
694    r0 = r; \
695    r1 = r >> biL; \
696    r0 += c; r1 += (r0 < c); \
697    r0 += *d; r1 += (r0 < *d); \
698    c = r1; *(d++) = r0;
699
700#define MULADDC_STOP \
701}
702
703#else
704#define MULADDC_INIT \
705{ \
706    t_int s0, s1, b0, b1; \
707    t_int r0, r1, rx, ry; \
708    b0 = ( b << biH ) >> biH; \
709    b1 = ( b >> biH );
710
711#define MULADDC_CORE \
712    s0 = ( *s << biH ) >> biH; \
713    s1 = ( *s >> biH ); s++; \
714    rx = s0 * b1; r0 = s0 * b0; \
715    ry = s1 * b0; r1 = s1 * b1; \
716    r1 += ( rx >> biH ); \
717    r1 += ( ry >> biH ); \
718    rx <<= biH; ry <<= biH; \
719    r0 += rx; r1 += (r0 < rx); \
720    r0 += ry; r1 += (r0 < ry); \
721    r0 += c; r1 += (r0 < c); \
722    r0 += *d; r1 += (r0 < *d); \
723    c = r1; *(d++) = r0;
724
725#define MULADDC_STOP \
726}
727
728#endif /* C (generic) */
729#endif /* C (longlong) */
730
731#endif /* bn_mul.h */
732

Archive Download this file



interactive