dec2flt: Update documentation of existing methods

Fix or elaborate existing float parsing documentation. This includes
introducing a convention that should make naming more consistent.
This commit is contained in:
Trevor Gross
2024-12-09 09:25:22 +00:00
parent a0ed304c21
commit 5a2da96a44
3 changed files with 35 additions and 17 deletions

View File

@@ -8,12 +8,12 @@ pub(crate) trait ByteSlice {
/// Writes a 64-bit integer as 8 bytes in little-endian order. /// Writes a 64-bit integer as 8 bytes in little-endian order.
fn write_u64(&mut self, value: u64); fn write_u64(&mut self, value: u64);
/// Calculate the offset of a slice from another. /// Calculate the difference in length between two slices.
fn offset_from(&self, other: &Self) -> isize; fn offset_from(&self, other: &Self) -> isize;
/// Iteratively parse and consume digits from bytes. /// Iteratively parse and consume digits from bytes.
/// Returns the same bytes with consumed digits being ///
/// elided. /// Returns the same bytes with consumed digits being elided. Breaks on invalid digits.
fn parse_digits(&self, func: impl FnMut(u8)) -> &Self; fn parse_digits(&self, func: impl FnMut(u8)) -> &Self;
} }
@@ -39,11 +39,11 @@ impl ByteSlice for [u8] {
fn parse_digits(&self, mut func: impl FnMut(u8)) -> &Self { fn parse_digits(&self, mut func: impl FnMut(u8)) -> &Self {
let mut s = self; let mut s = self;
while let Some((c, s_next)) = s.split_first() { while let Some((c, rest)) = s.split_first() {
let c = c.wrapping_sub(b'0'); let c = c.wrapping_sub(b'0');
if c < 10 { if c < 10 {
func(c); func(c);
s = s_next; s = rest;
} else { } else {
break; break;
} }
@@ -53,7 +53,9 @@ impl ByteSlice for [u8] {
} }
} }
/// Determine if 8 bytes are all decimal digits. /// Determine if all characters in an 8-byte byte string (represented as a `u64`) are all decimal
/// digits.
///
/// This does not care about the order in which the bytes were loaded. /// This does not care about the order in which the bytes were loaded.
pub(crate) fn is_8digits(v: u64) -> bool { pub(crate) fn is_8digits(v: u64) -> bool {
let a = v.wrapping_add(0x4646_4646_4646_4646); let a = v.wrapping_add(0x4646_4646_4646_4646);

View File

@@ -1,4 +1,4 @@
//! Arbitrary-precision decimal class for fallback algorithms. //! Arbitrary-precision decimal type used by fallback algorithms.
//! //!
//! This is only used if the fast-path (native floats) and //! This is only used if the fast-path (native floats) and
//! the Eisel-Lemire algorithm are unable to unambiguously //! the Eisel-Lemire algorithm are unable to unambiguously
@@ -11,6 +11,7 @@
use crate::num::dec2flt::common::{ByteSlice, is_8digits}; use crate::num::dec2flt::common::{ByteSlice, is_8digits};
/// A decimal floating-point number.
#[derive(Clone)] #[derive(Clone)]
pub(super) struct Decimal { pub(super) struct Decimal {
/// The number of significant digits in the decimal. /// The number of significant digits in the decimal.
@@ -30,18 +31,17 @@ impl Default for Decimal {
} }
impl Decimal { impl Decimal {
/// The maximum number of digits required to unambiguously round a float. /// The maximum number of digits required to unambiguously round up to a 64-bit float.
/// ///
/// For a double-precision IEEE 754 float, this required 767 digits, /// For an IEEE 754 binary64 float, this required 767 digits. So we store the max digits + 1.
/// so we store the max digits + 1.
/// ///
/// We can exactly represent a float in radix `b` from radix 2 if /// We can exactly represent a float in radix `b` from radix 2 if
/// `b` is divisible by 2. This function calculates the exact number of /// `b` is divisible by 2. This function calculates the exact number of
/// digits required to exactly represent that float. /// digits required to exactly represent that float.
/// ///
/// According to the "Handbook of Floating Point Arithmetic", /// According to the "Handbook of Floating Point Arithmetic",
/// for IEEE754, with emin being the min exponent, p2 being the /// for IEEE754, with `emin` being the min exponent, `p2` being the
/// precision, and b being the radix, the number of digits follows as: /// precision, and `b` being the radix, the number of digits follows as:
/// ///
/// `emin + p2 + ⌊(emin + 1) log(2, b) log(1 2^(p2), b)⌋` /// `emin + p2 + ⌊(emin + 1) log(2, b) log(1 2^(p2), b)⌋`
/// ///
@@ -56,11 +56,14 @@ impl Decimal {
/// In Python: /// In Python:
/// `-emin + p2 + math.floor((emin+ 1)*math.log(2, b)-math.log(1-2**(-p2), b))` /// `-emin + p2 + math.floor((emin+ 1)*math.log(2, b)-math.log(1-2**(-p2), b))`
pub(super) const MAX_DIGITS: usize = 768; pub(super) const MAX_DIGITS: usize = 768;
/// The max digits that can be exactly represented in a 64-bit integer. /// The max decimal digits that can be exactly represented in a 64-bit integer.
pub(super) const MAX_DIGITS_WITHOUT_OVERFLOW: usize = 19; pub(super) const MAX_DIGITS_WITHOUT_OVERFLOW: usize = 19;
pub(super) const DECIMAL_POINT_RANGE: i32 = 2047; pub(super) const DECIMAL_POINT_RANGE: i32 = 2047;
/// Append a digit to the buffer. /// Append a digit to the buffer if it fits.
// FIXME(tgross35): it may be better for this to return an option
// FIXME(tgross35): incrementing the digit counter even if we don't push anything
// seems incorrect.
pub(super) fn try_add_digit(&mut self, digit: u8) { pub(super) fn try_add_digit(&mut self, digit: u8) {
if self.num_digits < Self::MAX_DIGITS { if self.num_digits < Self::MAX_DIGITS {
self.digits[self.num_digits] = digit; self.digits[self.num_digits] = digit;
@@ -69,6 +72,7 @@ impl Decimal {
} }
/// Trim trailing zeros from the buffer. /// Trim trailing zeros from the buffer.
// FIXME(tgross35): this could be `.rev().position()` if perf is okay
pub(super) fn trim(&mut self) { pub(super) fn trim(&mut self) {
// All of the following calls to `Decimal::trim` can't panic because: // All of the following calls to `Decimal::trim` can't panic because:
// //
@@ -86,7 +90,7 @@ impl Decimal {
pub(super) fn round(&self) -> u64 { pub(super) fn round(&self) -> u64 {
if self.num_digits == 0 || self.decimal_point < 0 { if self.num_digits == 0 || self.decimal_point < 0 {
return 0; return 0;
} else if self.decimal_point > 18 { } else if self.decimal_point >= Self::MAX_DIGITS_WITHOUT_OVERFLOW as i32 {
return 0xFFFF_FFFF_FFFF_FFFF_u64; return 0xFFFF_FFFF_FFFF_FFFF_u64;
} }
let dp = self.decimal_point as usize; let dp = self.decimal_point as usize;

View File

@@ -3,8 +3,8 @@
//! # Problem statement //! # Problem statement
//! //!
//! We are given a decimal string such as `12.34e56`. This string consists of integral (`12`), //! We are given a decimal string such as `12.34e56`. This string consists of integral (`12`),
//! fractional (`34`), and exponent (`56`) parts. All parts are optional and interpreted as zero //! fractional (`34`), and exponent (`56`) parts. All parts are optional and interpreted as a
//! when missing. //! default value (1 or 0) when missing.
//! //!
//! We seek the IEEE 754 floating point number that is closest to the exact value of the decimal //! We seek the IEEE 754 floating point number that is closest to the exact value of the decimal
//! string. It is well-known that many decimal strings do not have terminating representations in //! string. It is well-known that many decimal strings do not have terminating representations in
@@ -67,6 +67,18 @@
//! "such that the exponent +/- the number of decimal digits fits into a 64 bit integer". //! "such that the exponent +/- the number of decimal digits fits into a 64 bit integer".
//! Larger exponents are accepted, but we don't do arithmetic with them, they are immediately //! Larger exponents are accepted, but we don't do arithmetic with them, they are immediately
//! turned into {positive,negative} {zero,infinity}. //! turned into {positive,negative} {zero,infinity}.
//!
//! # Notation
//!
//! This module uses the same notation as the Lemire paper:
//!
//! - `m`: binary mantissa; always nonnegative
//! - `p`: binary exponent; a signed integer
//! - `w`: decimal significand; always nonnegative
//! - `q`: decimal exponent; a signed integer
//!
//! This gives `m * 2^p` for the binary floating-point number, with `w * 10^q` as the decimal
//! equivalent.
#![doc(hidden)] #![doc(hidden)]
#![unstable( #![unstable(