rem_pio2_large comments
This commit is contained in:
@@ -1,117 +1,20 @@
|
||||
use super::scalbn;
|
||||
use super::floor;
|
||||
|
||||
/// double x[],y[]; int e0,nx,prec;
|
||||
///
|
||||
/// __rem_pio2_large return the last three digits of N with
|
||||
/// y = x - N*pi/2
|
||||
/// so that |y| < pi/2.
|
||||
///
|
||||
/// The method is to compute the integer (mod 8) and fraction parts of
|
||||
/// (2/pi)*x without doing the full multiplication. In general we
|
||||
/// skip the part of the product that are known to be a huge integer (
|
||||
/// more accurately, = 0 mod 8 ). Thus the number of operations are
|
||||
/// independent of the exponent of the input.
|
||||
///
|
||||
/// (2/pi) is represented by an array of 24-bit integers in ipio2[].
|
||||
///
|
||||
/// Input parameters:
|
||||
/// x[] The input value (must be positive) is broken into nx
|
||||
/// pieces of 24-bit integers in double precision format.
|
||||
/// x[i] will be the i-th 24 bit of x. The scaled exponent
|
||||
/// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
|
||||
/// match x's up to 24 bits.
|
||||
///
|
||||
/// Example of breaking a double positive z into x[0]+x[1]+x[2]:
|
||||
/// e0 = ilogb(z)-23
|
||||
/// z = scalbn(z,-e0)
|
||||
/// for i = 0,1,2
|
||||
/// x[i] = floor(z)
|
||||
/// z = (z-x[i])*2**24
|
||||
///
|
||||
/// y[] ouput result in an array of double precision numbers.
|
||||
/// The dimension of y[] is:
|
||||
/// 24-bit precision 1
|
||||
/// 53-bit precision 2
|
||||
/// 64-bit precision 2
|
||||
/// 113-bit precision 3
|
||||
/// The actual value is the sum of them. Thus for 113-bit
|
||||
/// precison, one may have to do something like:
|
||||
///
|
||||
/// long double t,w,r_head, r_tail;
|
||||
/// t = (long double)y[2] + (long double)y[1];
|
||||
/// w = (long double)y[0];
|
||||
/// r_head = t+w;
|
||||
/// r_tail = w - (r_head - t);
|
||||
///
|
||||
/// e0 The exponent of x[0]. Must be <= 16360 or you need to
|
||||
/// expand the ipio2 table.
|
||||
///
|
||||
/// prec an integer indicating the precision:
|
||||
/// 0 24 bits (single)
|
||||
/// 1 53 bits (double)
|
||||
/// 2 64 bits (extended)
|
||||
/// 3 113 bits (quad)
|
||||
/// External function:
|
||||
/// double scalbn(), floor();
|
||||
///
|
||||
///
|
||||
/// Here is the description of some local variables:
|
||||
///
|
||||
/// jk jk+1 is the initial number of terms of ipio2[] needed
|
||||
/// in the computation. The minimum and recommended value
|
||||
/// for jk is 3,4,4,6 for single, double, extended, and quad.
|
||||
/// jk+1 must be 2 larger than you might expect so that our
|
||||
/// recomputation test works. (Up to 24 bits in the integer
|
||||
/// part (the 24 bits of it that we compute) and 23 bits in
|
||||
/// the fraction part may be lost to cancelation before we
|
||||
/// recompute.)
|
||||
///
|
||||
/// jz local integer variable indicating the number of
|
||||
/// terms of ipio2[] used.
|
||||
///
|
||||
/// jx nx - 1
|
||||
///
|
||||
/// jv index for pointing to the suitable ipio2[] for the
|
||||
/// computation. In general, we want
|
||||
/// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
|
||||
/// is an integer. Thus
|
||||
/// e0-3-24*jv >= 0 or (e0-3)/24 >= jv
|
||||
/// Hence jv = max(0,(e0-3)/24).
|
||||
///
|
||||
/// jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
|
||||
///
|
||||
/// q[] double array with integral value, representing the
|
||||
/// 24-bits chunk of the product of x and 2/pi.
|
||||
///
|
||||
/// q0 the corresponding exponent of q[0]. Note that the
|
||||
/// exponent for q[i] would be q0-24*i.
|
||||
///
|
||||
/// PIo2[] double precision array, obtained by cutting pi/2
|
||||
/// into 24 bits chunks.
|
||||
///
|
||||
/// f[] ipio2[] in floating point
|
||||
///
|
||||
/// iq[] integer array by breaking up q[] in 24-bits chunk.
|
||||
///
|
||||
/// fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
|
||||
///
|
||||
/// ih integer. If >0 it indicates q[] is >= 0.5, hence
|
||||
/// it also indicates the *sign* of the result.
|
||||
// initial value for jk
|
||||
const INIT_JK : [usize; 4] = [3,4,4,6];
|
||||
|
||||
const INIT_JK : [usize; 4] = [3,4,4,6]; /* initial value for jk */
|
||||
|
||||
/// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
|
||||
///
|
||||
/// integer array, contains the (24*i)-th to (24*i+23)-th
|
||||
/// bit of 2/pi after binary point. The corresponding
|
||||
/// floating value is
|
||||
///
|
||||
/// ipio2[i] * 2^(-24(i+1)).
|
||||
///
|
||||
/// NB: This table must have at least (e0-3)/24 + jk terms.
|
||||
/// For quad precision (e0 <= 16360, jk = 6), this is 686.
|
||||
#[cfg(not(ldbl_max_exp_more1024))]
|
||||
// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
|
||||
//
|
||||
// integer array, contains the (24*i)-th to (24*i+23)-th
|
||||
// bit of 2/pi after binary point. The corresponding
|
||||
// floating value is
|
||||
//
|
||||
// ipio2[i] * 2^(-24(i+1)).
|
||||
//
|
||||
// NB: This table must have at least (e0-3)/24 + jk terms.
|
||||
// For quad precision (e0 <= 16360, jk = 6), this is 686.
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
const IPIO2 : [i32; 66] = [
|
||||
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
|
||||
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
|
||||
@@ -126,7 +29,7 @@ const IPIO2 : [i32; 66] = [
|
||||
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
|
||||
];
|
||||
|
||||
#[cfg(ldbl_max_exp_more1024)]
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
const IPIO2 : [i32; 690] = [
|
||||
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
|
||||
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
|
||||
@@ -256,6 +159,97 @@ const PIO2 : [f64; 8] = [
|
||||
2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
|
||||
];
|
||||
|
||||
// fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32
|
||||
//
|
||||
// Input parameters:
|
||||
// x[] The input value (must be positive) is broken into nx
|
||||
// pieces of 24-bit integers in double precision format.
|
||||
// x[i] will be the i-th 24 bit of x. The scaled exponent
|
||||
// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
|
||||
// match x's up to 24 bits.
|
||||
//
|
||||
// Example of breaking a double positive z into x[0]+x[1]+x[2]:
|
||||
// e0 = ilogb(z)-23
|
||||
// z = scalbn(z,-e0)
|
||||
// for i = 0,1,2
|
||||
// x[i] = floor(z)
|
||||
// z = (z-x[i])*2**24
|
||||
//
|
||||
// y[] ouput result in an array of double precision numbers.
|
||||
// The dimension of y[] is:
|
||||
// 24-bit precision 1
|
||||
// 53-bit precision 2
|
||||
// 64-bit precision 2
|
||||
// 113-bit precision 3
|
||||
// The actual value is the sum of them. Thus for 113-bit
|
||||
// precison, one may have to do something like:
|
||||
//
|
||||
// long double t,w,r_head, r_tail;
|
||||
// t = (long double)y[2] + (long double)y[1];
|
||||
// w = (long double)y[0];
|
||||
// r_head = t+w;
|
||||
// r_tail = w - (r_head - t);
|
||||
//
|
||||
// e0 The exponent of x[0]. Must be <= 16360 or you need to
|
||||
// expand the ipio2 table.
|
||||
//
|
||||
// prec an integer indicating the precision:
|
||||
// 0 24 bits (single)
|
||||
// 1 53 bits (double)
|
||||
// 2 64 bits (extended)
|
||||
// 3 113 bits (quad)
|
||||
//
|
||||
// Here is the description of some local variables:
|
||||
//
|
||||
// jk jk+1 is the initial number of terms of ipio2[] needed
|
||||
// in the computation. The minimum and recommended value
|
||||
// for jk is 3,4,4,6 for single, double, extended, and quad.
|
||||
// jk+1 must be 2 larger than you might expect so that our
|
||||
// recomputation test works. (Up to 24 bits in the integer
|
||||
// part (the 24 bits of it that we compute) and 23 bits in
|
||||
// the fraction part may be lost to cancelation before we
|
||||
// recompute.)
|
||||
//
|
||||
// jz local integer variable indicating the number of
|
||||
// terms of ipio2[] used.
|
||||
//
|
||||
// jx nx - 1
|
||||
//
|
||||
// jv index for pointing to the suitable ipio2[] for the
|
||||
// computation. In general, we want
|
||||
// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
|
||||
// is an integer. Thus
|
||||
// e0-3-24*jv >= 0 or (e0-3)/24 >= jv
|
||||
// Hence jv = max(0,(e0-3)/24).
|
||||
//
|
||||
// jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
|
||||
//
|
||||
// q[] double array with integral value, representing the
|
||||
// 24-bits chunk of the product of x and 2/pi.
|
||||
//
|
||||
// q0 the corresponding exponent of q[0]. Note that the
|
||||
// exponent for q[i] would be q0-24*i.
|
||||
//
|
||||
// PIo2[] double precision array, obtained by cutting pi/2
|
||||
// into 24 bits chunks.
|
||||
//
|
||||
// f[] ipio2[] in floating point
|
||||
//
|
||||
// iq[] integer array by breaking up q[] in 24-bits chunk.
|
||||
//
|
||||
// fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
|
||||
//
|
||||
// ih integer. If >0 it indicates q[] is >= 0.5, hence
|
||||
// it also indicates the *sign* of the result.
|
||||
|
||||
/// Return the last three digits of N with y = x - N*pi/2
|
||||
/// so that |y| < pi/2.
|
||||
///
|
||||
/// The method is to compute the integer (mod 8) and fraction parts of
|
||||
/// (2/pi)*x without doing the full multiplication. In general we
|
||||
/// skip the part of the product that are known to be a huge integer (
|
||||
/// more accurately, = 0 mod 8 ). Thus the number of operations are
|
||||
/// independent of the exponent of the input.
|
||||
#[inline]
|
||||
pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 {
|
||||
let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24
|
||||
|
||||
Reference in New Issue
Block a user