rem_pio2_large comments

2018-07-14 00:52:28 +03:00
parent 95138576c8
commit 52dba9843b
1 changed files with 105 additions and 111 deletions
--- a/library/compiler-builtins/libm/src/math/rem_pio2_large.rs
+++ b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs
@@ -1,117 +1,20 @@
 use super::scalbn;
 use super::floor;
-/// double x[],y[]; int e0,nx,prec;
+// initial value for jk
-///
+const INIT_JK : [usize; 4] = [3,4,4,6];
 /// __rem_pio2_large return the last three digits of N with
 ///              y = x - N*pi/2
 /// so that |y| < pi/2.
 ///
 /// The method is to compute the integer (mod 8) and fraction parts of
 /// (2/pi)*x without doing the full multiplication. In general we
 /// skip the part of the product that are known to be a huge integer (
 /// more accurately, = 0 mod 8 ). Thus the number of operations are
 /// independent of the exponent of the input.
 ///
 /// (2/pi) is represented by an array of 24-bit integers in ipio2[].
 ///
 /// Input parameters:
 ///      x[]     The input value (must be positive) is broken into nx
 ///              pieces of 24-bit integers in double precision format.
 ///              x[i] will be the i-th 24 bit of x. The scaled exponent
 ///              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
 ///              match x's up to 24 bits.
 ///
 ///              Example of breaking a double positive z into x[0]+x[1]+x[2]:
 ///                      e0 = ilogb(z)-23
 ///                      z  = scalbn(z,-e0)
 ///              for i = 0,1,2
 ///                      x[i] = floor(z)
 ///                      z    = (z-x[i])*2**24
 ///
 ///      y[]     ouput result in an array of double precision numbers.
 ///              The dimension of y[] is:
 ///                      24-bit  precision       1
 ///                      53-bit  precision       2
 ///                      64-bit  precision       2
 ///                      113-bit precision       3
 ///              The actual value is the sum of them. Thus for 113-bit
 ///              precison, one may have to do something like:
 ///
 ///              long double t,w,r_head, r_tail;
 ///              t = (long double)y[2] + (long double)y[1];
 ///              w = (long double)y[0];
 ///              r_head = t+w;
 ///              r_tail = w - (r_head - t);
 ///
 ///      e0      The exponent of x[0]. Must be <= 16360 or you need to
 ///              expand the ipio2 table.
 ///
 ///      prec    an integer indicating the precision:
 ///                      0       24  bits (single)
 ///                      1       53  bits (double)
 ///                      2       64  bits (extended)
 ///                      3       113 bits (quad)
 /// External function:
 ///      double scalbn(), floor();
 ///
 ///
 /// Here is the description of some local variables:
 ///
 ///      jk      jk+1 is the initial number of terms of ipio2[] needed
 ///              in the computation. The minimum and recommended value
 ///              for jk is 3,4,4,6 for single, double, extended, and quad.
 ///              jk+1 must be 2 larger than you might expect so that our
 ///              recomputation test works. (Up to 24 bits in the integer
 ///              part (the 24 bits of it that we compute) and 23 bits in
 ///              the fraction part may be lost to cancelation before we
 ///              recompute.)
 ///
 ///      jz      local integer variable indicating the number of
 ///              terms of ipio2[] used.
 ///
 ///      jx      nx - 1
 ///
 ///      jv      index for pointing to the suitable ipio2[] for the
 ///              computation. In general, we want
 ///                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
 ///              is an integer. Thus
 ///                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
 ///              Hence jv = max(0,(e0-3)/24).
 ///
 ///      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
 ///
 ///      q[]     double array with integral value, representing the
 ///              24-bits chunk of the product of x and 2/pi.
 ///
 ///      q0      the corresponding exponent of q[0]. Note that the
 ///              exponent for q[i] would be q0-24*i.
 ///
 ///      PIo2[]  double precision array, obtained by cutting pi/2
 ///              into 24 bits chunks.
 ///
 ///      f[]     ipio2[] in floating point
 ///
 ///      iq[]    integer array by breaking up q[] in 24-bits chunk.
 ///
 ///      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
 ///
 ///      ih      integer. If >0 it indicates q[] is >= 0.5, hence
 ///              it also indicates the *sign* of the result.
-const INIT_JK : [usize; 4] = [3,4,4,6]; /* initial value for jk */
+// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
-
+//
-/// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
+//              integer array, contains the (24*i)-th to (24*i+23)-th
-///
+//              bit of 2/pi after binary point. The corresponding
-///              integer array, contains the (24*i)-th to (24*i+23)-th
+//              floating value is
-///              bit of 2/pi after binary point. The corresponding
+//
-///              floating value is
+//                      ipio2[i] * 2^(-24(i+1)).
-///
+//
-///                      ipio2[i] * 2^(-24(i+1)).
+// NB: This table must have at least (e0-3)/24 + jk terms.
-///
+//     For quad precision (e0 <= 16360, jk = 6), this is 686.
-/// NB: This table must have at least (e0-3)/24 + jk terms.
+#[cfg(target_pointer_width = "32")]
 ///     For quad precision (e0 <= 16360, jk = 6), this is 686.
 #[cfg(not(ldbl_max_exp_more1024))]
 const IPIO2 : [i32; 66] = [
    0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
    0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
@@ -126,7 +29,7 @@ const IPIO2 : [i32; 66] = [
    0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 ];
-#[cfg(ldbl_max_exp_more1024)]
+#[cfg(target_pointer_width = "64")]
 const IPIO2 : [i32; 690] = [
    0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
    0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
@@ -256,6 +159,97 @@ const PIO2 : [f64; 8] = [
    2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
 ];
 // fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32
 //
 // Input parameters:
 //      x[]     The input value (must be positive) is broken into nx
 //              pieces of 24-bit integers in double precision format.
 //              x[i] will be the i-th 24 bit of x. The scaled exponent
 //              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
 //              match x's up to 24 bits.
 //
 //              Example of breaking a double positive z into x[0]+x[1]+x[2]:
 //                      e0 = ilogb(z)-23
 //                      z  = scalbn(z,-e0)
 //              for i = 0,1,2
 //                      x[i] = floor(z)
 //                      z    = (z-x[i])*2**24
 //
 //      y[]     ouput result in an array of double precision numbers.
 //              The dimension of y[] is:
 //                      24-bit  precision       1
 //                      53-bit  precision       2
 //                      64-bit  precision       2
 //                      113-bit precision       3
 //              The actual value is the sum of them. Thus for 113-bit
 //              precison, one may have to do something like:
 //
 //              long double t,w,r_head, r_tail;
 //              t = (long double)y[2] + (long double)y[1];
 //              w = (long double)y[0];
 //              r_head = t+w;
 //              r_tail = w - (r_head - t);
 //
 //      e0      The exponent of x[0]. Must be <= 16360 or you need to
 //              expand the ipio2 table.
 //
 //      prec    an integer indicating the precision:
 //                      0       24  bits (single)
 //                      1       53  bits (double)
 //                      2       64  bits (extended)
 //                      3       113 bits (quad)
 //
 // Here is the description of some local variables:
 //
 //      jk      jk+1 is the initial number of terms of ipio2[] needed
 //              in the computation. The minimum and recommended value
 //              for jk is 3,4,4,6 for single, double, extended, and quad.
 //              jk+1 must be 2 larger than you might expect so that our
 //              recomputation test works. (Up to 24 bits in the integer
 //              part (the 24 bits of it that we compute) and 23 bits in
 //              the fraction part may be lost to cancelation before we
 //              recompute.)
 //
 //      jz      local integer variable indicating the number of
 //              terms of ipio2[] used.
 //
 //      jx      nx - 1
 //
 //      jv      index for pointing to the suitable ipio2[] for the
 //              computation. In general, we want
 //                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
 //              is an integer. Thus
 //                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
 //              Hence jv = max(0,(e0-3)/24).
 //
 //      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
 //
 //      q[]     double array with integral value, representing the
 //              24-bits chunk of the product of x and 2/pi.
 //
 //      q0      the corresponding exponent of q[0]. Note that the
 //              exponent for q[i] would be q0-24*i.
 //
 //      PIo2[]  double precision array, obtained by cutting pi/2
 //              into 24 bits chunks.
 //
 //      f[]     ipio2[] in floating point
 //
 //      iq[]    integer array by breaking up q[] in 24-bits chunk.
 //
 //      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
 //
 //      ih      integer. If >0 it indicates q[] is >= 0.5, hence
 //              it also indicates the *sign* of the result.
 /// Return the last three digits of N with y = x - N*pi/2
 /// so that |y| < pi/2.
 ///
 /// The method is to compute the integer (mod 8) and fraction parts of
 /// (2/pi)*x without doing the full multiplication. In general we
 /// skip the part of the product that are known to be a huge integer (
 /// more accurately, = 0 mod 8 ). Thus the number of operations are
 /// independent of the exponent of the input.
 #[inline]
 pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 {
    let x1p24  = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24