shithub: mc

Download patch

ref: 56741fafc347f664a0ef0aebae51054b544891ed
parent: 5a95abf7cc557ebccb3aa80c8ce9679eb82dcaa1
author: S. Gilles <[email protected]>
date: Tue Jun 26 08:18:27 EDT 2018

Update generators for constants.

Now that the tables of triples have finished calculating, check in
the final version.

--- a/lib/math/ancillary/KM06-calc.gp
+++ b/lib/math/ancillary/KM06-calc.gp
@@ -1,5 +1,7 @@
-# Implementations of some functions from [KM06]. The exact ranges
-# were applied by manual fiddling.
+/*
+ Implementations of some functions from [KM06]. The exact ranges
+ were applied by manual fiddling.
+ */
 
 { betap(amin, amax, p, n) = my(l, s);
         l = amax^(1/p);
--- /dev/null
+++ b/lib/math/ancillary/generate-minimax-by-Remez.gp
@@ -1,0 +1,127 @@
+/*
+  Attempts to find a minimax polynomial of degree n for f via Remez
+  algorithm.
+ */
+
+{ chebyshev_node(a, b, k, n) = my(p, m, c);
+        p = (b + a)/2;
+        m = (b - a)/2;
+        c = cos(Pi * (2*k - 1)/(2*n));
+        return(p + m*c);
+}
+
+{ remez_step(f, n, a, b, x) = my(M, xx, bvec, k);
+        M = matrix(n + 2, n + 2);
+        bvec = vector(n + 2);
+        for (k = 1, n + 2,
+                xx = x[k];
+                for (j = 1, n + 1,
+                        M[k,j] = xx^(j - 1);
+                );
+                M[k, n + 2] = (-1)^k;
+                bvec[k] = f(xx);
+        );
+        return(mattranspose(M^-1 * mattranspose(bvec)));
+}
+
+{ p_eval(n, v, x) = my(s, k);
+        s = 0;
+        for (k = 1, n + 1,
+                s = s + v[k]*x^(k - 1)
+        );
+
+        return(s);
+}
+
+{ err(f, n, v, x) =
+        return(abs(f(x) - p_eval(n, v, x)));
+}
+
+{ find_M(f, n, v, a, b) = my(X, gran, l, lnext, len, xprev, xcur, xnext, yprev, ycur, ynext, thisa, thisb, k, j);
+        gran = 100 * n;
+        l = listcreate();
+
+        xprev = a - (b - a)/gran;
+        yprev = err(f, n, v, xprev);
+
+        xcur = a;
+        ycur = err(f, n, v, xprev);
+        
+        xnext = a + (b - a)/gran;
+        ynext = err(f, n, v, xprev);
+
+        for (k = 2, gran,
+                xprev = xcur;
+                yprev = ycur;
+                
+                xcur = xnext;
+                ycur = ynext;
+
+                xnext = a + k*(b - a)/gran;
+                ynext = err(f, n, v, xnext);
+
+                if(ycur > yprev && ycur > ynext, listput(l, [xcur, abs(ycur)]),);
+        );
+
+        vecsort(l, 2);
+        if(length(l) < n + 2 || l[1][2] < 2^(-100), return("q"),);
+        len = length(l);
+
+        lnext = listcreate();
+        for(j = 1, n + 2,
+                thisa = l[j][1] - (b-a)/gran;
+                thisb = l[j][1] + (b-a)/gran;
+
+                xprev = thisa - (thisb - a)/gran;
+                yprev = err(f, n, v, xprev);
+
+                xcur = thisa;
+                ycur = err(f, n, v, xprev);
+        
+                xnext = thisa + (thisb - thisa)/gran;
+                ynext = err(f, n, v, xprev);
+
+                for (k = 2, gran,
+                        xprev = xcur;
+                        yprev = ycur;
+                
+                        xcur = xnext;
+                        ycur = ynext;
+
+                        xnext = thisa + k*(thisb - thisa)/gran;
+                        ynext = abs(f(xnext) - p_eval(n, v, xnext));
+
+                        if(ycur > yprev && ycur > ynext, listput(lnext, xcur),);
+                );
+        );
+        vecsort(lnext, cmp);
+        listkill(l);
+        X = vector(n + 2);
+        for (k = 1, min(n + 2, length(lnext)), X[k] = lnext[k]);
+        listkill(lnext);
+        vecsort(X);
+        return(X);
+}
+
+{ find_minimax(f, n, a, b) = my(c, k, j);
+        c = vector(n + 2);
+        for (k = 1, n + 2,
+                c[k] = chebyshev_node(a, b, k, n + 2);
+        );
+        for(j = 1, 100,
+                v = remez_step(f, n, a, b, c);
+                print("v = ", v);
+                c = find_M(f, n, v, a, b);
+                if(c == "q", return,);
+                print("c = ", c);
+        );
+}
+
+print("\n");
+print("Minimaxing sin(x), degree 7, on [-Pi/(4 * 256), Pi/(4 * 256)]:");
+find_minimax(sin, 7, -Pi/1024, Pi/1024)
+print("\n");
+print("Minimaxing cos(x), degree 7, on [-Pi/(4 * 256), Pi/(4 * 256)]:");
+find_minimax(cos, 7, -Pi/1024, Pi/1024)
+print("\n");
+
--- a/lib/math/ancillary/generate-triples-for-GB91.c
+++ b/lib/math/ancillary/generate-triples-for-GB91.c
@@ -1,4 +1,4 @@
-/* cc -o generate-triples-for-GB91 generate-triples-for-GB91.c -lmpfr */
+/* cc -o generate-triples-for-GB91 generate-triples-for-GB91.c -lmpfr # -fno-strict-aliasing */
 #include <stdint.h>
 #include <stdio.h>
 #include <time.h>
@@ -38,10 +38,11 @@
 static int find_triple_64(int i, int min_leeway, int perfect_leeway)
 {
         /*
-           Using mpfr is entirely overkill for this; [Lut95] includes
-           PASCAL fragments that use almost entirely integer
-           arithmetic, and the initial calculation doesn't need to
-           be so precise. No matter.
+           Using mpfr is not entirely overkill for this; [Lut95]
+           includes PASCAL fragments that use almost entirely integer
+           arithmetic... but the error term in that only handles
+           up to 13 extra bits of zeroes or so. We proudly boast
+           at least 16 bits of extra zeroes in all cases.
          */
         mpfr_t xi;
         mpfr_t xip1;
@@ -154,7 +155,8 @@
 
                 if (sgn < 0) {
                         r++;
-                } else if (r > (1 << 29) || xi_current_u > xip1_u) {
+                } else if (r > (1 << 29) ||
+                           xi_current_u > xip1_u) {
                         /*
                            This is taking too long, give up looking
                            for perfection and take the best we've
@@ -170,7 +172,7 @@
 
         if (best_l > min_leeway) {
                 printf(
-                        "[ .a = %#018lx, .c = %#018lx, .s = %#018lx ], /* i = %03d, l = %02d, r = %010ld, t = %ld */ \n",
+                        "(%#018lx, %#018lx, %#018lx), /* i = %03d, l = %02d, r = %010ld, t = %ld */ \n",
                         best_xi_u, best_cos_u, best_sin_u, i, best_l, best_r,
                         end -
                         start);
@@ -185,7 +187,18 @@
 {
         for (int i = 190; i <= N; ++i) {
                 if (find_triple_64(i, 11, 20) < 0) {
-                        printf("CANNOT FIND SUITABLE CANDIDATE FOR i = %03d\n", i);
+                        /*
+                           For some reason, i = 190 requires special
+                           handling; drop the range limitations on
+                           r and come back in a week.
+
+                           Other indices (4, 47, 74, &c) also benefit
+                           from this special handling, so the
+                           contents of sin-impl.myr might not exactly
+                           match the output of this particular file.
+                         */
+                        printf("CANNOT FIND SUITABLE CANDIDATE FOR i = %03d\n",
+                               i);
                 }
         }
 
--- /dev/null
+++ b/lib/math/ancillary/pi-constants.c
@@ -1,0 +1,80 @@
+/* cc -o pi-constants pi-constants.c -lmpfr */
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <mpfr.h>
+
+#define FLT64_TO_UINT64(f) (*((uint64_t *) ((char *) &f)))
+#define UINT64_TO_FLT64(u) (*((double *) ((char *) &u)))
+
+int main(void)
+{
+        mpfr_t pi;
+        mpfr_t two_pi;
+        mpfr_t t;
+        mpfr_t t2;
+        mpfr_t perfect_n;
+        double d = 0;
+        uint64_t u = 0;
+
+        mpfr_init2(pi, 10000);
+        mpfr_init2(two_pi, 10000);
+        mpfr_init2(t, 10000);
+        mpfr_init2(t2, 10000);
+        mpfr_init2(perfect_n, 10000);
+        mpfr_const_pi(pi, MPFR_RNDN);
+        mpfr_mul_si(two_pi, pi, 2, MPFR_RNDN);
+
+        for (long e = 25; e <= 1023; e += 50) {
+                mpfr_set_si(t, e, MPFR_RNDN);
+                mpfr_exp2(t, t, MPFR_RNDN);
+                mpfr_fmod(t2, t, two_pi, MPFR_RNDN);
+                mpfr_set(t, t2, MPFR_RNDN);
+                d = mpfr_get_d(t, MPFR_RNDN);
+                u = FLT64_TO_UINT64(d);
+                printf("(%#018lx, ", u);
+                mpfr_set_d(t2, d, MPFR_RNDN);
+                mpfr_sub(t, t, t2, MPFR_RNDN);
+                d = mpfr_get_d(t, MPFR_RNDN);
+                u = FLT64_TO_UINT64(d);
+                printf("%#018lx, ", u);
+                mpfr_set_d(t2, d, MPFR_RNDN);
+                mpfr_sub(t, t, t2, MPFR_RNDN);
+                d = mpfr_get_d(t, MPFR_RNDN);
+                u = FLT64_TO_UINT64(d);
+                printf("%#018lx), /* 2^%ld mod 2pi */\n", u, e);
+        }
+
+        printf("\n");
+        printf("1000 bits of pi/4:\n");
+
+        for (int bits_obtained = 0; bits_obtained < 1000; bits_obtained += 53) {
+                mpfr_div_si(pi, pi, 4, MPFR_RNDN);
+                d = mpfr_get_d(pi, MPFR_RNDN);
+                u = FLT64_TO_UINT64(d);
+                printf("%#018lx\n", u);
+                mpfr_set_d(t, d, MPFR_RNDN);
+                mpfr_sub(pi, pi, t, MPFR_RNDN);
+        }
+
+        printf("\n");
+        printf("Pre-computed 4/pi:\n");
+        mpfr_const_pi(pi, MPFR_RNDN);
+        mpfr_set_si(t, 4, MPFR_RNDN);
+        mpfr_div(pi, t, pi, MPFR_RNDN);
+        d = mpfr_get_d(pi, MPFR_RNDN);
+        u = FLT64_TO_UINT64(d);
+        printf("%#018lx\n", u);
+
+        printf("\n");
+        printf("Pre-computed 1/(pi/1024):\n");
+        mpfr_const_pi(pi, MPFR_RNDN);
+        mpfr_set_si(t, 1024, MPFR_RNDN);
+        mpfr_div(pi, t, pi, MPFR_RNDN);
+        d = mpfr_get_d(pi, MPFR_RNDN);
+        u = FLT64_TO_UINT64(d);
+        printf("%#018lx\n", u);
+
+        return 0;
+}