rem_pio2_large comments

This commit is contained in:
Andrey Zgarbul
2018-07-14 00:52:28 +03:00
parent 95138576c8
commit 52dba9843b

View File

@@ -1,117 +1,20 @@
use super::scalbn;
use super::floor;
/// double x[],y[]; int e0,nx,prec;
///
/// __rem_pio2_large return the last three digits of N with
/// y = x - N*pi/2
/// so that |y| < pi/2.
///
/// The method is to compute the integer (mod 8) and fraction parts of
/// (2/pi)*x without doing the full multiplication. In general we
/// skip the part of the product that are known to be a huge integer (
/// more accurately, = 0 mod 8 ). Thus the number of operations are
/// independent of the exponent of the input.
///
/// (2/pi) is represented by an array of 24-bit integers in ipio2[].
///
/// Input parameters:
/// x[] The input value (must be positive) is broken into nx
/// pieces of 24-bit integers in double precision format.
/// x[i] will be the i-th 24 bit of x. The scaled exponent
/// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
/// match x's up to 24 bits.
///
/// Example of breaking a double positive z into x[0]+x[1]+x[2]:
/// e0 = ilogb(z)-23
/// z = scalbn(z,-e0)
/// for i = 0,1,2
/// x[i] = floor(z)
/// z = (z-x[i])*2**24
///
/// y[] ouput result in an array of double precision numbers.
/// The dimension of y[] is:
/// 24-bit precision 1
/// 53-bit precision 2
/// 64-bit precision 2
/// 113-bit precision 3
/// The actual value is the sum of them. Thus for 113-bit
/// precison, one may have to do something like:
///
/// long double t,w,r_head, r_tail;
/// t = (long double)y[2] + (long double)y[1];
/// w = (long double)y[0];
/// r_head = t+w;
/// r_tail = w - (r_head - t);
///
/// e0 The exponent of x[0]. Must be <= 16360 or you need to
/// expand the ipio2 table.
///
/// prec an integer indicating the precision:
/// 0 24 bits (single)
/// 1 53 bits (double)
/// 2 64 bits (extended)
/// 3 113 bits (quad)
/// External function:
/// double scalbn(), floor();
///
///
/// Here is the description of some local variables:
///
/// jk jk+1 is the initial number of terms of ipio2[] needed
/// in the computation. The minimum and recommended value
/// for jk is 3,4,4,6 for single, double, extended, and quad.
/// jk+1 must be 2 larger than you might expect so that our
/// recomputation test works. (Up to 24 bits in the integer
/// part (the 24 bits of it that we compute) and 23 bits in
/// the fraction part may be lost to cancelation before we
/// recompute.)
///
/// jz local integer variable indicating the number of
/// terms of ipio2[] used.
///
/// jx nx - 1
///
/// jv index for pointing to the suitable ipio2[] for the
/// computation. In general, we want
/// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
/// is an integer. Thus
/// e0-3-24*jv >= 0 or (e0-3)/24 >= jv
/// Hence jv = max(0,(e0-3)/24).
///
/// jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
///
/// q[] double array with integral value, representing the
/// 24-bits chunk of the product of x and 2/pi.
///
/// q0 the corresponding exponent of q[0]. Note that the
/// exponent for q[i] would be q0-24*i.
///
/// PIo2[] double precision array, obtained by cutting pi/2
/// into 24 bits chunks.
///
/// f[] ipio2[] in floating point
///
/// iq[] integer array by breaking up q[] in 24-bits chunk.
///
/// fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
///
/// ih integer. If >0 it indicates q[] is >= 0.5, hence
/// it also indicates the *sign* of the result.
// initial value for jk
const INIT_JK : [usize; 4] = [3,4,4,6];
const INIT_JK : [usize; 4] = [3,4,4,6]; /* initial value for jk */
/// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
///
/// integer array, contains the (24*i)-th to (24*i+23)-th
/// bit of 2/pi after binary point. The corresponding
/// floating value is
///
/// ipio2[i] * 2^(-24(i+1)).
///
/// NB: This table must have at least (e0-3)/24 + jk terms.
/// For quad precision (e0 <= 16360, jk = 6), this is 686.
#[cfg(not(ldbl_max_exp_more1024))]
// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
//
// integer array, contains the (24*i)-th to (24*i+23)-th
// bit of 2/pi after binary point. The corresponding
// floating value is
//
// ipio2[i] * 2^(-24(i+1)).
//
// NB: This table must have at least (e0-3)/24 + jk terms.
// For quad precision (e0 <= 16360, jk = 6), this is 686.
#[cfg(target_pointer_width = "32")]
const IPIO2 : [i32; 66] = [
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
@@ -126,7 +29,7 @@ const IPIO2 : [i32; 66] = [
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
];
#[cfg(ldbl_max_exp_more1024)]
#[cfg(target_pointer_width = "64")]
const IPIO2 : [i32; 690] = [
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
@@ -256,6 +159,97 @@ const PIO2 : [f64; 8] = [
2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
];
// fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32
//
// Input parameters:
// x[] The input value (must be positive) is broken into nx
// pieces of 24-bit integers in double precision format.
// x[i] will be the i-th 24 bit of x. The scaled exponent
// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
// match x's up to 24 bits.
//
// Example of breaking a double positive z into x[0]+x[1]+x[2]:
// e0 = ilogb(z)-23
// z = scalbn(z,-e0)
// for i = 0,1,2
// x[i] = floor(z)
// z = (z-x[i])*2**24
//
// y[] ouput result in an array of double precision numbers.
// The dimension of y[] is:
// 24-bit precision 1
// 53-bit precision 2
// 64-bit precision 2
// 113-bit precision 3
// The actual value is the sum of them. Thus for 113-bit
// precison, one may have to do something like:
//
// long double t,w,r_head, r_tail;
// t = (long double)y[2] + (long double)y[1];
// w = (long double)y[0];
// r_head = t+w;
// r_tail = w - (r_head - t);
//
// e0 The exponent of x[0]. Must be <= 16360 or you need to
// expand the ipio2 table.
//
// prec an integer indicating the precision:
// 0 24 bits (single)
// 1 53 bits (double)
// 2 64 bits (extended)
// 3 113 bits (quad)
//
// Here is the description of some local variables:
//
// jk jk+1 is the initial number of terms of ipio2[] needed
// in the computation. The minimum and recommended value
// for jk is 3,4,4,6 for single, double, extended, and quad.
// jk+1 must be 2 larger than you might expect so that our
// recomputation test works. (Up to 24 bits in the integer
// part (the 24 bits of it that we compute) and 23 bits in
// the fraction part may be lost to cancelation before we
// recompute.)
//
// jz local integer variable indicating the number of
// terms of ipio2[] used.
//
// jx nx - 1
//
// jv index for pointing to the suitable ipio2[] for the
// computation. In general, we want
// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
// is an integer. Thus
// e0-3-24*jv >= 0 or (e0-3)/24 >= jv
// Hence jv = max(0,(e0-3)/24).
//
// jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
//
// q[] double array with integral value, representing the
// 24-bits chunk of the product of x and 2/pi.
//
// q0 the corresponding exponent of q[0]. Note that the
// exponent for q[i] would be q0-24*i.
//
// PIo2[] double precision array, obtained by cutting pi/2
// into 24 bits chunks.
//
// f[] ipio2[] in floating point
//
// iq[] integer array by breaking up q[] in 24-bits chunk.
//
// fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
//
// ih integer. If >0 it indicates q[] is >= 0.5, hence
// it also indicates the *sign* of the result.
/// Return the last three digits of N with y = x - N*pi/2
/// so that |y| < pi/2.
///
/// The method is to compute the integer (mod 8) and fraction parts of
/// (2/pi)*x without doing the full multiplication. In general we
/// skip the part of the product that are known to be a huge integer (
/// more accurately, = 0 mod 8 ). Thus the number of operations are
/// independent of the exponent of the input.
#[inline]
pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 {
let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24