rem_pio2_large comments
This commit is contained in:
@@ -1,117 +1,20 @@
|
|||||||
use super::scalbn;
|
use super::scalbn;
|
||||||
use super::floor;
|
use super::floor;
|
||||||
|
|
||||||
/// double x[],y[]; int e0,nx,prec;
|
// initial value for jk
|
||||||
///
|
const INIT_JK : [usize; 4] = [3,4,4,6];
|
||||||
/// __rem_pio2_large return the last three digits of N with
|
|
||||||
/// y = x - N*pi/2
|
|
||||||
/// so that |y| < pi/2.
|
|
||||||
///
|
|
||||||
/// The method is to compute the integer (mod 8) and fraction parts of
|
|
||||||
/// (2/pi)*x without doing the full multiplication. In general we
|
|
||||||
/// skip the part of the product that are known to be a huge integer (
|
|
||||||
/// more accurately, = 0 mod 8 ). Thus the number of operations are
|
|
||||||
/// independent of the exponent of the input.
|
|
||||||
///
|
|
||||||
/// (2/pi) is represented by an array of 24-bit integers in ipio2[].
|
|
||||||
///
|
|
||||||
/// Input parameters:
|
|
||||||
/// x[] The input value (must be positive) is broken into nx
|
|
||||||
/// pieces of 24-bit integers in double precision format.
|
|
||||||
/// x[i] will be the i-th 24 bit of x. The scaled exponent
|
|
||||||
/// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
|
|
||||||
/// match x's up to 24 bits.
|
|
||||||
///
|
|
||||||
/// Example of breaking a double positive z into x[0]+x[1]+x[2]:
|
|
||||||
/// e0 = ilogb(z)-23
|
|
||||||
/// z = scalbn(z,-e0)
|
|
||||||
/// for i = 0,1,2
|
|
||||||
/// x[i] = floor(z)
|
|
||||||
/// z = (z-x[i])*2**24
|
|
||||||
///
|
|
||||||
/// y[] ouput result in an array of double precision numbers.
|
|
||||||
/// The dimension of y[] is:
|
|
||||||
/// 24-bit precision 1
|
|
||||||
/// 53-bit precision 2
|
|
||||||
/// 64-bit precision 2
|
|
||||||
/// 113-bit precision 3
|
|
||||||
/// The actual value is the sum of them. Thus for 113-bit
|
|
||||||
/// precison, one may have to do something like:
|
|
||||||
///
|
|
||||||
/// long double t,w,r_head, r_tail;
|
|
||||||
/// t = (long double)y[2] + (long double)y[1];
|
|
||||||
/// w = (long double)y[0];
|
|
||||||
/// r_head = t+w;
|
|
||||||
/// r_tail = w - (r_head - t);
|
|
||||||
///
|
|
||||||
/// e0 The exponent of x[0]. Must be <= 16360 or you need to
|
|
||||||
/// expand the ipio2 table.
|
|
||||||
///
|
|
||||||
/// prec an integer indicating the precision:
|
|
||||||
/// 0 24 bits (single)
|
|
||||||
/// 1 53 bits (double)
|
|
||||||
/// 2 64 bits (extended)
|
|
||||||
/// 3 113 bits (quad)
|
|
||||||
/// External function:
|
|
||||||
/// double scalbn(), floor();
|
|
||||||
///
|
|
||||||
///
|
|
||||||
/// Here is the description of some local variables:
|
|
||||||
///
|
|
||||||
/// jk jk+1 is the initial number of terms of ipio2[] needed
|
|
||||||
/// in the computation. The minimum and recommended value
|
|
||||||
/// for jk is 3,4,4,6 for single, double, extended, and quad.
|
|
||||||
/// jk+1 must be 2 larger than you might expect so that our
|
|
||||||
/// recomputation test works. (Up to 24 bits in the integer
|
|
||||||
/// part (the 24 bits of it that we compute) and 23 bits in
|
|
||||||
/// the fraction part may be lost to cancelation before we
|
|
||||||
/// recompute.)
|
|
||||||
///
|
|
||||||
/// jz local integer variable indicating the number of
|
|
||||||
/// terms of ipio2[] used.
|
|
||||||
///
|
|
||||||
/// jx nx - 1
|
|
||||||
///
|
|
||||||
/// jv index for pointing to the suitable ipio2[] for the
|
|
||||||
/// computation. In general, we want
|
|
||||||
/// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
|
|
||||||
/// is an integer. Thus
|
|
||||||
/// e0-3-24*jv >= 0 or (e0-3)/24 >= jv
|
|
||||||
/// Hence jv = max(0,(e0-3)/24).
|
|
||||||
///
|
|
||||||
/// jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
|
|
||||||
///
|
|
||||||
/// q[] double array with integral value, representing the
|
|
||||||
/// 24-bits chunk of the product of x and 2/pi.
|
|
||||||
///
|
|
||||||
/// q0 the corresponding exponent of q[0]. Note that the
|
|
||||||
/// exponent for q[i] would be q0-24*i.
|
|
||||||
///
|
|
||||||
/// PIo2[] double precision array, obtained by cutting pi/2
|
|
||||||
/// into 24 bits chunks.
|
|
||||||
///
|
|
||||||
/// f[] ipio2[] in floating point
|
|
||||||
///
|
|
||||||
/// iq[] integer array by breaking up q[] in 24-bits chunk.
|
|
||||||
///
|
|
||||||
/// fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
|
|
||||||
///
|
|
||||||
/// ih integer. If >0 it indicates q[] is >= 0.5, hence
|
|
||||||
/// it also indicates the *sign* of the result.
|
|
||||||
|
|
||||||
const INIT_JK : [usize; 4] = [3,4,4,6]; /* initial value for jk */
|
// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
|
||||||
|
//
|
||||||
/// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
|
// integer array, contains the (24*i)-th to (24*i+23)-th
|
||||||
///
|
// bit of 2/pi after binary point. The corresponding
|
||||||
/// integer array, contains the (24*i)-th to (24*i+23)-th
|
// floating value is
|
||||||
/// bit of 2/pi after binary point. The corresponding
|
//
|
||||||
/// floating value is
|
// ipio2[i] * 2^(-24(i+1)).
|
||||||
///
|
//
|
||||||
/// ipio2[i] * 2^(-24(i+1)).
|
// NB: This table must have at least (e0-3)/24 + jk terms.
|
||||||
///
|
// For quad precision (e0 <= 16360, jk = 6), this is 686.
|
||||||
/// NB: This table must have at least (e0-3)/24 + jk terms.
|
#[cfg(target_pointer_width = "32")]
|
||||||
/// For quad precision (e0 <= 16360, jk = 6), this is 686.
|
|
||||||
#[cfg(not(ldbl_max_exp_more1024))]
|
|
||||||
const IPIO2 : [i32; 66] = [
|
const IPIO2 : [i32; 66] = [
|
||||||
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
|
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
|
||||||
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
|
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
|
||||||
@@ -126,7 +29,7 @@ const IPIO2 : [i32; 66] = [
|
|||||||
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
|
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
|
||||||
];
|
];
|
||||||
|
|
||||||
#[cfg(ldbl_max_exp_more1024)]
|
#[cfg(target_pointer_width = "64")]
|
||||||
const IPIO2 : [i32; 690] = [
|
const IPIO2 : [i32; 690] = [
|
||||||
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
|
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
|
||||||
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
|
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
|
||||||
@@ -256,6 +159,97 @@ const PIO2 : [f64; 8] = [
|
|||||||
2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
|
2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32
|
||||||
|
//
|
||||||
|
// Input parameters:
|
||||||
|
// x[] The input value (must be positive) is broken into nx
|
||||||
|
// pieces of 24-bit integers in double precision format.
|
||||||
|
// x[i] will be the i-th 24 bit of x. The scaled exponent
|
||||||
|
// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
|
||||||
|
// match x's up to 24 bits.
|
||||||
|
//
|
||||||
|
// Example of breaking a double positive z into x[0]+x[1]+x[2]:
|
||||||
|
// e0 = ilogb(z)-23
|
||||||
|
// z = scalbn(z,-e0)
|
||||||
|
// for i = 0,1,2
|
||||||
|
// x[i] = floor(z)
|
||||||
|
// z = (z-x[i])*2**24
|
||||||
|
//
|
||||||
|
// y[] ouput result in an array of double precision numbers.
|
||||||
|
// The dimension of y[] is:
|
||||||
|
// 24-bit precision 1
|
||||||
|
// 53-bit precision 2
|
||||||
|
// 64-bit precision 2
|
||||||
|
// 113-bit precision 3
|
||||||
|
// The actual value is the sum of them. Thus for 113-bit
|
||||||
|
// precison, one may have to do something like:
|
||||||
|
//
|
||||||
|
// long double t,w,r_head, r_tail;
|
||||||
|
// t = (long double)y[2] + (long double)y[1];
|
||||||
|
// w = (long double)y[0];
|
||||||
|
// r_head = t+w;
|
||||||
|
// r_tail = w - (r_head - t);
|
||||||
|
//
|
||||||
|
// e0 The exponent of x[0]. Must be <= 16360 or you need to
|
||||||
|
// expand the ipio2 table.
|
||||||
|
//
|
||||||
|
// prec an integer indicating the precision:
|
||||||
|
// 0 24 bits (single)
|
||||||
|
// 1 53 bits (double)
|
||||||
|
// 2 64 bits (extended)
|
||||||
|
// 3 113 bits (quad)
|
||||||
|
//
|
||||||
|
// Here is the description of some local variables:
|
||||||
|
//
|
||||||
|
// jk jk+1 is the initial number of terms of ipio2[] needed
|
||||||
|
// in the computation. The minimum and recommended value
|
||||||
|
// for jk is 3,4,4,6 for single, double, extended, and quad.
|
||||||
|
// jk+1 must be 2 larger than you might expect so that our
|
||||||
|
// recomputation test works. (Up to 24 bits in the integer
|
||||||
|
// part (the 24 bits of it that we compute) and 23 bits in
|
||||||
|
// the fraction part may be lost to cancelation before we
|
||||||
|
// recompute.)
|
||||||
|
//
|
||||||
|
// jz local integer variable indicating the number of
|
||||||
|
// terms of ipio2[] used.
|
||||||
|
//
|
||||||
|
// jx nx - 1
|
||||||
|
//
|
||||||
|
// jv index for pointing to the suitable ipio2[] for the
|
||||||
|
// computation. In general, we want
|
||||||
|
// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
|
||||||
|
// is an integer. Thus
|
||||||
|
// e0-3-24*jv >= 0 or (e0-3)/24 >= jv
|
||||||
|
// Hence jv = max(0,(e0-3)/24).
|
||||||
|
//
|
||||||
|
// jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
|
||||||
|
//
|
||||||
|
// q[] double array with integral value, representing the
|
||||||
|
// 24-bits chunk of the product of x and 2/pi.
|
||||||
|
//
|
||||||
|
// q0 the corresponding exponent of q[0]. Note that the
|
||||||
|
// exponent for q[i] would be q0-24*i.
|
||||||
|
//
|
||||||
|
// PIo2[] double precision array, obtained by cutting pi/2
|
||||||
|
// into 24 bits chunks.
|
||||||
|
//
|
||||||
|
// f[] ipio2[] in floating point
|
||||||
|
//
|
||||||
|
// iq[] integer array by breaking up q[] in 24-bits chunk.
|
||||||
|
//
|
||||||
|
// fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
|
||||||
|
//
|
||||||
|
// ih integer. If >0 it indicates q[] is >= 0.5, hence
|
||||||
|
// it also indicates the *sign* of the result.
|
||||||
|
|
||||||
|
/// Return the last three digits of N with y = x - N*pi/2
|
||||||
|
/// so that |y| < pi/2.
|
||||||
|
///
|
||||||
|
/// The method is to compute the integer (mod 8) and fraction parts of
|
||||||
|
/// (2/pi)*x without doing the full multiplication. In general we
|
||||||
|
/// skip the part of the product that are known to be a huge integer (
|
||||||
|
/// more accurately, = 0 mod 8 ). Thus the number of operations are
|
||||||
|
/// independent of the exponent of the input.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 {
|
pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 {
|
||||||
let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24
|
let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24
|
||||||
|
|||||||
Reference in New Issue
Block a user