Root/plasma/kernel/math.c

1/*--------------------------------------------------------------------
2 * TITLE: Plasma Floating Point Library
3 * AUTHOR: Steve Rhoads (rhoadss@yahoo.com)
4 * DATE CREATED: 3/2/06
5 * FILENAME: math.c
6 * PROJECT: Plasma CPU core
7 * COPYRIGHT: Software placed into the public domain by the author.
8 * Software 'as is' without warranty. Author liable for nothing.
9 * DESCRIPTION:
10 * Plasma Floating Point Library
11 *--------------------------------------------------------------------
12 * IEEE_fp = sign(1) | exponent(8) | fraction(23)
13 * cos(x)=1-x^2/2!+x^4/4!-x^6/6!+...
14 * exp(x)=1+x+x^2/2!+x^3/3!+...
15 * ln(1+x)=x-x^2/2+x^3/3-x^4/4+...
16 * atan(x)=x-x^3/3+x^5/5-x^7/7+...
17 * pow(x,y)=exp(y*ln(x))
18 * x=tan(a+b)=(tan(a)+tan(b))/(1-tan(a)*tan(b))
19 * atan(x)=b+atan((x-atan(b))/(1+x*atan(b)))
20 * ln(a*x)=ln(a)+ln(x); ln(x^n)=n*ln(x)
21 *--------------------------------------------------------------------*/
22#include "rtos.h"
23
24//#define USE_SW_MULT
25#if !defined(WIN32) && !defined(USE_SW_MULT)
26#define USE_MULT64
27#endif
28
29#define PI ((float)3.1415926)
30#define PI_2 ((float)(PI/2.0))
31#define PI2 ((float)(PI*2.0))
32
33#define FtoL(X) (*(unsigned long*)&(X))
34#define LtoF(X) (*(float*)&(X))
35
36
37float FP_Neg(float a_fp)
38{
39   unsigned long a;
40   a = FtoL(a_fp);
41   a ^= 0x80000000;
42   return LtoF(a);
43}
44
45
46float FP_Add(float a_fp, float b_fp)
47{
48   unsigned long a, b, c;
49   unsigned long as, bs, cs; //sign
50   long ae, af, be, bf, ce, cf; //exponent and fraction
51   a = FtoL(a_fp);
52   b = FtoL(b_fp);
53   as = a >> 31; //sign
54   ae = (a >> 23) & 0xff; //exponent
55   af = 0x00800000 | (a & 0x007fffff); //fraction
56   bs = b >> 31;
57   be = (b >> 23) & 0xff;
58   bf = 0x00800000 | (b & 0x007fffff);
59   if(ae > be)
60   {
61      if(ae - be < 30)
62         bf >>= ae - be;
63      else
64         bf = 0;
65      ce = ae;
66   }
67   else
68   {
69      if(be - ae < 30)
70         af >>= be - ae;
71      else
72         af = 0;
73      ce = be;
74   }
75   cf = (as ? -af : af) + (bs ? -bf : bf);
76   cs = cf < 0;
77   cf = cf>=0 ? cf : -cf;
78   if(cf == 0)
79      return LtoF(cf);
80   while(cf & 0xff000000)
81   {
82      ++ce;
83      cf >>= 1;
84   }
85   while((cf & 0xff800000) == 0)
86   {
87      --ce;
88      cf <<= 1;
89   }
90   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
91   if(ce < 1)
92      c = 0;
93   return LtoF(c);
94}
95
96
97float FP_Sub(float a_fp, float b_fp)
98{
99   return FP_Add(a_fp, FP_Neg(b_fp));
100}
101
102
103float FP_Mult(float a_fp, float b_fp)
104{
105   unsigned long a, b, c;
106   unsigned long as, af, bs, bf, cs, cf;
107   long ae, be, ce;
108#ifndef USE_MULT64
109   unsigned long a2, a1, b2, b1, med1, med2;
110#endif
111   unsigned long hi, lo;
112   a = FtoL(a_fp);
113   b = FtoL(b_fp);
114   as = a >> 31;
115   ae = (a >> 23) & 0xff;
116   af = 0x00800000 | (a & 0x007fffff);
117   bs = b >> 31;
118   be = (b >> 23) & 0xff;
119   bf = 0x00800000 | (b & 0x007fffff);
120   cs = as ^ bs;
121#ifndef USE_MULT64
122   a1 = af & 0xffff;
123   a2 = af >> 16;
124   b1 = bf & 0xffff;
125   b2 = bf >> 16;
126   lo = a1 * b1;
127   med1 = a2 * b1 + (lo >> 16);
128   med2 = a1 * b2;
129   hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
130   med1 = (med1 & 0xffff) + (med2 & 0xffff);
131   hi += (med1 >> 16);
132   lo = (med1 << 16) | (lo & 0xffff);
133#else
134   lo = OS_AsmMult(af, bf, &hi);
135#endif
136   cf = (hi << 9) | (lo >> 23);
137   ce = ae + be - 0x80 + 1;
138   if(cf == 0)
139      return LtoF(cf);
140   while(cf & 0xff000000)
141   {
142      ++ce;
143      cf >>= 1;
144   }
145   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
146   if(ce < 1)
147      c = 0;
148   return LtoF(c);
149}
150
151
152float FP_Div(float a_fp, float b_fp)
153{
154   unsigned long a, b, c;
155   unsigned long as, af, bs, bf, cs, cf;
156   unsigned long a1, b1;
157#ifndef USE_MULT64
158   unsigned long a2, b2, med1, med2;
159#endif
160   unsigned long hi, lo;
161   long ae, be, ce, d;
162   a = FtoL(a_fp);
163   b = FtoL(b_fp);
164   as = a >> 31;
165   ae = (a >> 23) & 0xff;
166   af = 0x00800000 | (a & 0x007fffff);
167   bs = b >> 31;
168   be = (b >> 23) & 0xff;
169   bf = 0x00800000 | (b & 0x007fffff);
170   cs = as ^ bs;
171   ce = ae - (be - 0x80) + 6 - 8;
172   a1 = af << 4; //8
173   b1 = bf >> 8;
174   cf = a1 / b1;
175   cf <<= 12; //8
176#if 1 /*non-quick*/
177#ifndef USE_MULT64
178   a1 = cf & 0xffff;
179   a2 = cf >> 16;
180   b1 = bf & 0xffff;
181   b2 = bf >> 16;
182   lo = a1 * b1;
183   med1 =a2 * b1 + (lo >> 16);
184   med2 = a1 * b2;
185   hi = a2 * b2 + (med1 >> 16) + (med2 >> 16);
186   med1 = (med1 & 0xffff) + (med2 & 0xffff);
187   hi += (med1 >> 16);
188   lo = (med1 << 16) | (lo & 0xffff);
189#else
190   lo = OS_AsmMult(cf, bf, &hi);
191#endif
192   lo = (hi << 8) | (lo >> 24);
193   d = af - lo; //remainder
194   assert(-0xffff < d && d < 0xffff);
195   d <<= 16;
196   b1 = bf >> 8;
197   d = d / (long)b1;
198   cf += d;
199#endif
200   if(cf == 0)
201      return LtoF(cf);
202   while(cf & 0xff000000)
203   {
204      ++ce;
205      cf >>= 1;
206   }
207   if(ce < 0)
208      ce = 0;
209   c = (cs << 31) | (ce << 23) | (cf & 0x007fffff);
210   if(ce < 1)
211      c = 0;
212   return LtoF(c);
213}
214
215
216long FP_ToLong(float a_fp)
217{
218   unsigned long a;
219   unsigned long as;
220   long ae;
221   long af, shift;
222   a = FtoL(a_fp);
223   as = a >> 31;
224   ae = (a >> 23) & 0xff;
225   af = 0x00800000 | (a & 0x007fffff);
226   af <<= 7;
227   shift = -(ae - 0x80 - 29);
228   if(shift > 0)
229   {
230      if(shift < 31)
231         af >>= shift;
232      else
233         af = 0;
234   }
235   af = as ? -af: af;
236   return af;
237}
238
239
240float FP_ToFloat(long af)
241{
242   unsigned long a;
243   unsigned long as, ae;
244   as = af>=0 ? 0: 1;
245   af = af>=0 ? af: -af;
246   ae = 0x80 + 22;
247   if(af == 0)
248      return LtoF(af);
249   while(af & 0xff000000)
250   {
251      ++ae;
252      af >>= 1;
253   }
254   while((af & 0xff800000) == 0)
255   {
256      --ae;
257      af <<= 1;
258   }
259   a = (as << 31) | (ae << 23) | (af & 0x007fffff);
260   return LtoF(a);
261}
262
263
264//0 iff a==b; 1 iff a>b; -1 iff a<b
265int FP_Cmp(float a_fp, float b_fp)
266{
267   unsigned long a, b;
268   unsigned long as, ae, af, bs, be, bf;
269   int gt;
270   a = FtoL(a_fp);
271   b = FtoL(b_fp);
272   if(a == b)
273      return 0;
274   as = a >> 31;
275   bs = b >> 31;
276   if(as > bs)
277      return -1;
278   if(as < bs)
279      return 1;
280   gt = as ? -1 : 1;
281   ae = (a >> 23) & 0xff;
282   be = (b >> 23) & 0xff;
283   if(ae > be)
284      return gt;
285   if(ae < be)
286      return -gt;
287   af = 0x00800000 | (a & 0x007fffff);
288   bf = 0x00800000 | (b & 0x007fffff);
289   if(af > bf)
290      return gt;
291   return -gt;
292}
293
294
295int __ltsf2(float a, float b)
296{
297   return FP_Cmp(a, b);
298}
299
300int __lesf2(float a, float b)
301{
302   return FP_Cmp(a, b);
303}
304
305int __gtsf2(float a, float b)
306{
307   return FP_Cmp(a, b);
308}
309
310int __gesf2(float a, float b)
311{
312   return FP_Cmp(a, b);
313}
314
315int __eqsf2(float a, float b)
316{
317   return FtoL(a) != FtoL(b);
318}
319
320int __nesf2(float a, float b)
321{
322   return FtoL(a) != FtoL(b);
323}
324
325
326float FP_Sqrt(float a)
327{
328   float x1, y1, x2, y2, x3;
329   long i;
330   x1 = FP_ToFloat(1);
331   y1 = FP_Sub(FP_Mult(x1, x1), a); //y1=x1*x1-a;
332   x2 = FP_ToFloat(100);
333   y2 = FP_Sub(FP_Mult(x2, x2), a);
334   for(i = 0; i < 10; ++i)
335   {
336      if(FtoL(y1) == FtoL(y2))
337         return x2;
338      //x3=x2-(x1-x2)*y2/(y1-y2);
339      x3 = FP_Sub(x2, FP_Div(FP_Mult(FP_Sub(x1, x2), y2), FP_Sub(y1, y2)));
340      x1 = x2;
341      y1 = y2;
342      x2 = x3;
343      y2 = FP_Sub(FP_Mult(x2, x2), a);
344   }
345   return x2;
346}
347
348
349float FP_Cos(float rad)
350{
351   int n;
352   float answer, x2, top, bottom, sign;
353   while(FP_Cmp(rad, PI2) > 0)
354      rad = FP_Sub(rad, PI2);
355   while(FP_Cmp(rad, (float)0.0) < 0)
356      rad = FP_Add(rad, PI2);
357   answer = 1.0;
358   sign = 1.0;
359   if(FP_Cmp(rad, PI) >= 0)
360   {
361      rad = FP_Sub(rad, PI);
362      sign = FP_ToFloat(-1);
363   }
364   if(FP_Cmp(rad, PI_2) >= 0)
365   {
366      rad = FP_Sub(PI, rad);
367      sign = FP_Neg(sign);
368   }
369   x2 = FP_Mult(rad, rad);
370   top = 1.0;
371   bottom = 1.0;
372   for(n = 2; n < 12; n += 2)
373   {
374      top = FP_Mult(top, FP_Neg(x2));
375      bottom = FP_Mult(bottom, FP_ToFloat((n - 1) * n));
376      answer = FP_Add(answer, FP_Div(top, bottom));
377   }
378   return FP_Mult(answer, sign);
379}
380
381
382float FP_Sin(float rad)
383{
384   const float pi_2=(float)(PI/2.0);
385   return FP_Cos(FP_Sub(rad, pi_2));
386}
387
388
389float FP_Atan(float x)
390{
391   const float b=(float)(PI/8.0);
392   const float atan_b=(float)0.37419668; //atan(b);
393   int n;
394   float answer, x2, top;
395   if(FP_Cmp(x, (float)0.0) >= 0)
396   {
397      if(FP_Cmp(x, (float)1.0) > 0)
398         return FP_Sub(PI_2, FP_Atan(FP_Div((float)1.0, x)));
399   }
400   else
401   {
402      if(FP_Cmp(x, (float)-1.0) > 0)
403         return FP_Sub(-PI_2, FP_Atan(FP_Div((float)1.0, x)));
404   }
405   if(FP_Cmp(x, (float)0.45) > 0)
406   {
407      //answer = (x - atan_b) / (1 + x * atan_b);
408      answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));
409      //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME fudge?*/
410      answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);
411      return answer;
412   }
413   if(FP_Cmp(x, (float)-0.45) < 0)
414   {
415      x = FP_Neg(x);
416      //answer = (x - atan_b) / (1 + x * atan_b);
417      answer = FP_Div(FP_Sub(x, atan_b), FP_Add(1.0, FP_Mult(x, atan_b)));
418      //answer = b + FP_Atan(answer) - (float)0.034633; /*FIXME*/
419      answer = FP_Sub(FP_Add(b, FP_Atan(answer)), (float)0.034633);
420      return FP_Neg(answer);
421   }
422   answer = x;
423   x2 = FP_Mult(FP_Neg(x), x);
424   top = x;
425   for(n = 3; n < 14; n += 2)
426   {
427      top = FP_Mult(top, x2);
428      answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
429   }
430   return answer;
431}
432
433
434float FP_Atan2(float y, float x)
435{
436   float answer,r;
437   r = y / x;
438   answer = FP_Atan(r);
439   if(FP_Cmp(x, (float)0.0) < 0)
440   {
441      if(FP_Cmp(y, (float)0.0) > 0)
442         answer = FP_Add(answer, PI);
443      else
444         answer = FP_Sub(answer, PI);
445   }
446   return answer;
447}
448
449
450float FP_Exp(float x)
451{
452   const float e2=(float)7.389056099;
453   const float inv_e2=(float)0.135335283;
454   float answer, top, bottom, mult;
455   int n;
456
457   mult = 1.0;
458   while(FP_Cmp(x, (float)2.0) > 0)
459   {
460      mult = FP_Mult(mult, e2);
461      x = FP_Add(x, (float)-2.0);
462   }
463   while(FP_Cmp(x, (float)-2.0) < 0)
464   {
465      mult = FP_Mult(mult, inv_e2);
466      x = FP_Add(x, (float)2.0);
467   }
468   answer = FP_Add((float)1.0, x);
469   top = x;
470   bottom = 1.0;
471   for(n = 2; n < 15; ++n)
472   {
473      top = FP_Mult(top, x);
474      bottom = FP_Mult(bottom, FP_ToFloat(n));
475      answer = FP_Add(answer, FP_Div(top, bottom));
476   }
477   return FP_Mult(answer, mult);
478}
479
480
481float FP_Log(float x)
482{
483   const float log_2=(float)0.69314718; /*log(2.0)*/
484   int n;
485   float answer, top, add;
486   add = 0.0;
487   while(FP_Cmp(x, 16.0) > 0)
488   {
489      x = FP_Mult(x, (float)0.0625);
490      add = FP_Add(add, (float)(log_2 * 4));
491   }
492   while(FP_Cmp(x, 1.5) > 0)
493   {
494      x = FP_Mult(x, 0.5);
495      add = FP_Add(add, log_2);
496   }
497   while(FP_Cmp(x, 0.5) < 0)
498   {
499      x = FP_Mult(x, 2.0);
500      add = FP_Sub(add, log_2);
501   }
502   x = FP_Sub(x, 1.0);
503   answer = 0.0;
504   top = -1.0;
505   for(n = 1; n < 14; ++n)
506   {
507      top = FP_Mult(top, FP_Neg(x));
508      answer = FP_Add(answer, FP_Div(top, FP_ToFloat(n)));
509   }
510   return FP_Add(answer, add);
511}
512
513
514float FP_Pow(float x, float y)
515{
516   return FP_Exp(y * FP_Log(x));
517}
518
519
520/********************************************/
521//These five functions will only be used if the flag "-mno-mul" is enabled
522#ifdef USE_SW_MULT
523unsigned long __mulsi3(unsigned long a, unsigned long b)
524{
525   unsigned long answer = 0;
526   while(b)
527   {
528      if(b & 1)
529         answer += a;
530      a <<= 1;
531      b >>= 1;
532   }
533   return answer;
534}
535
536
537static unsigned long DivideMod(unsigned long a, unsigned long b, int doMod)
538{
539   unsigned long upper=a, lower=0;
540   int i;
541   a = b << 31;
542   for(i = 0; i < 32; ++i)
543   {
544      lower = lower << 1;
545      if(upper >= a && a && b < 2)
546      {
547         upper = upper - a;
548         lower |= 1;
549      }
550      a = ((b&2) << 30) | (a >> 1);
551      b = b >> 1;
552   }
553   if(!doMod)
554      return lower;
555   return upper;
556}
557
558
559unsigned long __udivsi3(unsigned long a, unsigned long b)
560{
561   return DivideMod(a, b, 0);
562}
563
564
565long __divsi3(long a, long b)
566{
567   long answer, negate=0;
568   if(a < 0)
569   {
570      a = -a;
571      negate = !negate;
572   }
573   if(b < 0)
574   {
575      b = -b;
576      negate = !negate;
577   }
578   answer = DivideMod(a, b, 0);
579   if(negate)
580      answer = -answer;
581   return answer;
582}
583
584
585unsigned long __umodsi3(unsigned long a, unsigned long b)
586{
587   return DivideMod(a, b, 1);
588}
589#endif
590
591
592/*************** Test *****************/
593#ifdef WIN32
594#undef _LIBC
595#include <math.h>
596#undef printf
597#undef getch
598int printf(const char *, ...);
599struct {
600   char *name;
601   float low, high;
602   double (*func1)(double);
603   float (*func2)(float);
604} test_info[]={
605   {"cos", -2*PI, 2*PI, cos, FP_Cos},
606   {"sin", -2*PI, 2*PI, sin, FP_Sin},
607   {"atan", -3.0, 2.0, atan, FP_Atan},
608   {"log", (float)0.01, (float)4.0, log, FP_Log},
609   {"exp", (float)-5.01, (float)30.0, exp, FP_Exp},
610   {"sqrt", (float)0.01, (float)1000.0, sqrt, FP_Sqrt}
611};
612
613
614void TestMathFull(void)
615{
616   float a, b, c, d;
617   float error1, error2, error3, error4, error5;
618   int test;
619
620   a = PI * PI;
621   b = PI;
622   c = FP_Div(a, b);
623   printf("%10f %10f %10f %10f %10f\n",
624      (double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));
625   a = a * 200;
626   for(b = -(float)2.718281828*100; b < 300; b += (float)23.678)
627   {
628      c = FP_Div(a, b);
629      d = a / b - c;
630      printf("%10f %10f %10f %10f %10f\n",
631         (double)a, (double)b, (double)(a/b), (double)c, (double)(a/b-c));
632   }
633   //getch();
634
635   for(test = 0; test < 6; ++test)
636   {
637      printf("\nTesting %s\n", test_info[test].name);
638      for(a = test_info[test].low;
639          a <= test_info[test].high;
640          a += (test_info[test].high-test_info[test].low)/(float)20.0)
641      {
642         b = (float)test_info[test].func1(a);
643         c = test_info[test].func2(a);
644         d = b - c;
645         printf("%s %10f %10f %10f %10f\n", test_info[test].name, a, b, c, d);
646      }
647      //getch();
648   }
649
650   a = FP_ToFloat((long)6.0);
651   b = FP_ToFloat((long)2.0);
652   printf("%f %f\n", (double)a, (double)b);
653   c = FP_Add(a, b);
654   printf("add %f %f\n", (double)(a + b), (double)c);
655   c = FP_Sub(a, b);
656   printf("sub %f %f\n", (double)(a - b), (double)c);
657   c = FP_Mult(a, b);
658   printf("mult %f %f\n", (double)(a * b), (double)c);
659   c = FP_Div(a, b);
660   printf("div %f %f\n", (double)(a / b), (double)c);
661   //getch();
662
663   for(a = (float)-13756.54; a < (float)17400.0; a += (float)64.45)
664   {
665      for(b = (float)-875.36; b < (float)935.8; b += (float)36.7)
666      {
667         error1 = (float)1.0 - (a + b) / FP_Add(a, b);
668         error2 = (float)1.0 - (a * b) / FP_Mult(a, b);
669         error3 = (float)1.0 - (a / b) / FP_Div(a, b);
670         error4 = (float)1.0 - a / FP_ToFloat(FP_ToLong(a));
671         error5 = error1 + error2 + error3 + error4;
672         if(error5 < 0.00005)
673            continue;
674         printf("ERROR!\n");
675         printf("a=%f b=%f\n", (double)a, (double)b);
676         printf(" a+b=%f %f\n", (double)(a+b), (double)FP_Add(a, b));
677         printf(" a*b=%f %f\n", (double)(a*b), (double)FP_Mult(a, b));
678         printf(" a/b=%f %f\n", (double)(a/b), (double)FP_Div(a, b));
679         printf(" a=%f %ld %f\n", (double)a, FP_ToLong(a),
680                                   (double)FP_ToFloat((long)a));
681         printf(" %f %f %f %f\n", (double)error1, (double)error2,
682            (double)error3, (double)error4);
683         //if(error5 > 0.001)
684         // getch();
685      }
686   }
687   printf("done.\n");
688   //getch();
689}
690#endif
691
692
693

Archive Download this file

Branches:
master



interactive