{-# LANGUAGE ExplicitForAll #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples #-}
-- |
-- Module      : Data.ByteString.Builder.RealFloat.TableGenerator
-- Copyright   : (c) Lawrence Wu 2021
-- License     : BSD-style
-- Maintainer  : lawrencejwu@gmail.com
--
-- Constants and overview for compile-time table generation for Ryu internals
--
-- This module uses Haskell's arbitrary-precision `Integer` types to compute
-- the necessary multipliers for efficient conversion to a decimal power base.
--
-- It also exposes constants relevant to the 32- and 64-bit tables (e.g maximum
-- number of bits required to store the table values).

module Data.ByteString.Builder.RealFloat.TableGenerator
  ( float_pow5_inv_bitcount
  , float_pow5_bitcount
  , double_pow5_bitcount
  , double_pow5_inv_bitcount
  , float_max_split
  , float_max_inv_split
  , double_max_split
  , double_max_inv_split
  ) where

import GHC.Float (int2Double)


-- The basic floating point conversion algorithm is as such:
--
-- Given floating point
--
--   f = (-1)^s * m_f * 2^e_f
--
-- which is IEEE encoded by `[s] [.. e ..] [.. m ..]`. `s` is the sign bit, `e`
-- is the biased exponent, and `m` is the mantissa, let
--
--       | e /= 0            | e == 0
--  -----+-------------------+-----------
--   m_f | 2^len(m) + m      | m
--   e_f | e - bias - len(m) | 1 - bias - len(m)
--
-- we compute the halfway points to the next smaller (`f-`) and larger (`f+`)
-- floating point numbers as
--
--  lower halfway point u * 2^e2, u = 4 * m_f - (if m == 0 then 1 else 2)
--                      v * 2^e2, v = 4 * m_f
--  upper halfway point w * 2^e2, u = 4 * m_f + 2
--  where e2 = ef - 2 (so u, v, w are integers)
--
--
-- Then we compute (a, b, c) * 10^e10 = (u, v, w) * 2^e2 which is split into
-- the case of
--
--   e2 >= 0   ==>    e10 = 0 , (a, b, c) = (u, v, w) * 2^e2
--   e2 <  0   ==>    e10 = e2, (a, b, c) = (u, v, w) * 5^-e2
--
-- And finally we find the shortest representation from integers d0 and e0 such
-- that
--
--  a * 10^e10 < d0 * 10^(e0+e10) < c * 10^e10
--
-- such that e0 is maximal (we allow equality to smaller or larger halfway
-- point depending on rounding mode). This is found through iteratively
-- dividing by 10 while a/10^j < c/10^j and doing some bookkeeping around
-- zeros.
--
--
--
--
-- The ryu algorithm removes the requirement for arbitrary precision arithmetic
-- and improves the runtime significantly by skipping most of the iterative
-- division by carefully selecting a point where certain invariants hold and
-- precomputing a few tables.
--
-- Specifically, define `q` such that the correspondings values of a/10^q <
-- c/10^q - 1. We can prove (not shown) that
--
--    if e2 >= 0, q = e2 * log_10(2)
--    if e2 <  0, q = -e2 * log_10(5)
--
-- Then we can compute (a, b, c) / 10^q. Starting from (u, v, w) we have
--
--      (a, b, c) / 10^q                  (a, b, c) / 10^q
--    = (u, v, w) * 2^e2 / 10^q    OR   = (u, v, w) * 5^-e2 / 10^q
--
-- And since q < e2,
--
--    = (u, v, w) * 2^e2-q / 5^q   OR   = (u, v, w) * 5^-e2-q / 2^q
--
-- While (u, v, w) are n-bit numbers, 5^q and whatnot are significantly larger,
-- but we only need the top-most n bits of the result so we can choose `k` that
-- reduce the number of bits required to ~2n. We then multiply by either
--
--    2^k / 5^q                    OR   5^-e2-q / 2^k
--
-- The required `k` is roughly linear in the exponent (we need more of the
-- multiplication to be precise) but the number of bits to store the
-- multiplicands above stays fixed.
--
-- Since the number of bits needed is relatively small for IEEE 32- and 64-bit
-- floating types, we can compute appropriate values for `k` for the
-- floating-point-type-specific bounds instead of each e2.
--
-- Finally, we need to do some final manual iterations potentially to do a
-- final fixup of the skipped state


-- | Bound for bits of @2^k / 5^q@ for floats
float_pow5_inv_bitcount :: Int
float_pow5_inv_bitcount :: Int
float_pow5_inv_bitcount = Int
59

-- | Bound for bits of @5^-e2-q / 2^k@ for floats
float_pow5_bitcount :: Int
float_pow5_bitcount :: Int
float_pow5_bitcount = Int
61

-- | Bound for bits of @5^-e2-q / 2^k@ for doubles
double_pow5_bitcount :: Int
double_pow5_bitcount :: Int
double_pow5_bitcount = Int
125

-- | Bound for bits of @2^k / 5^q@ for doubles
double_pow5_inv_bitcount :: Int
double_pow5_inv_bitcount :: Int
double_pow5_inv_bitcount = Int
125

-- NB: these tables are encoded directly into the source code in F2S and D2S
--
-- -- | Number of bits in a positive integer
-- blen :: Integer -> Int
-- blen 0 = 0
-- blen 1 = 1
-- blen n = 1 + blen (n `quot` 2)

-- -- | Used for table generation of 2^k / 5^q + 1
-- finv :: Int -> Int -> Integer
-- finv bitcount i =
--   let p = 5^i
--    in (1 `shiftL` (blen p - 1 + bitcount)) `div` p + 1

-- -- | Used for table generation of 5^-e2-q / 2^k
-- fnorm :: Int -> Int -> Integer
-- fnorm bitcount i =
--   let p = 5^i
--       s = blen p - bitcount
--    in if s < 0 then p `shiftL` (-s) else p `shiftR` s

-- -- | Generates a compile-time lookup table for floats as Word64
-- gen_table_f :: Int -> (Int -> Integer) -> Q Exp
-- gen_table_f n f = return $ ListE (fmap (LitE . IntegerL . f) [0..n])
--
-- -- | Generates a compile-time lookup table for doubles as Word128
-- gen_table_d :: Int -> (Int -> Integer) -> Q Exp
-- gen_table_d n f = return $ ListE (fmap ff [0..n])
--   where
--     ff :: Int -> Exp
--     ff c = let r = f c
--                hi = r `shiftR` 64
--                lo = r .&. ((1 `shiftL` 64) - 1)
--             in AppE (AppE (ConE 'Word128) (LitE . IntegerL $ hi)) (LitE . IntegerL $ lo)

-- Given a specific floating-point type, determine the range of q for the < 0
-- and >= 0 cases
get_range :: forall ff. (RealFloat ff) => ff -> (Int, Int)
get_range :: forall ff. RealFloat ff => ff -> (Int, Int)
get_range ff
f =
  let (Int
emin, Int
emax) = ff -> (Int, Int)
forall ff. RealFloat ff => ff -> (Int, Int)
floatRange ff
f
      mantissaDigits :: Int
mantissaDigits = ff -> Int
forall a. RealFloat a => a -> Int
floatDigits ff
f
      emin' :: Int
emin' = Int
emin Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
mantissaDigits Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2
      emax' :: Int
emax' = Int
emax Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
mantissaDigits Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2
   in ( (-Int
emin') Int -> Int -> Int
forall a. Num a => a -> a -> a
- Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
floor (Int -> Double
int2Double (-Int
emin') Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double -> Double
forall a. Floating a => a -> a -> a
logBase Double
10 Double
5)
      , Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
floor (Int -> Double
int2Double Int
emax' Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double -> Double
forall a. Floating a => a -> a -> a
logBase Double
10 Double
2))

float_max_split :: Int     -- = 46
float_max_inv_split :: Int -- = 30
(Int
float_max_split, Int
float_max_inv_split) = Float -> (Int, Int)
forall ff. RealFloat ff => ff -> (Int, Int)
get_range (Float
forall a. HasCallStack => a
undefined :: Float)

-- we take a slightly different codepath s.t we need one extra entry
double_max_split :: Int     -- = 325
double_max_inv_split :: Int -- = 291
(Int
double_max_split, Int
double_max_inv_split) =
    let (Int
m, Int
mi) = Double -> (Int, Int)
forall ff. RealFloat ff => ff -> (Int, Int)
get_range (Double
forall a. HasCallStack => a
undefined :: Double)
     in (Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, Int
mi)