Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a Parsers.jl-based parser implementation #80

Merged
merged 16 commits into from
Jun 14, 2023
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.0'
- '1.6'
- '1'
# - 'nightly'
os:
Expand Down
4 changes: 4 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ uuid = "fb4d412d-6eee-574d-9565-ede6634db7b0"
authors = ["Fengyang Wang <fengyang.wang.0@gmail.com>", "Curtis Vogt <curtis.vogt@gmail.com>"]
version = "0.4.2"

[deps]
Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"

[compat]
Parsers = "2.7"
julia = "1.6"

[extras]
Expand Down
84 changes: 12 additions & 72 deletions src/FixedPointDecimals.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ module FixedPointDecimals
export FixedDecimal, RoundThrows

using Base: decompose, BitInteger
import Parsers

# floats that support fma and are roughly IEEE-like
const FMAFloat = Union{Float16, Float32, Float64, BigFloat}
Expand Down Expand Up @@ -99,6 +100,17 @@ end
end

const FD = FixedDecimal
const RoundThrows = RoundingMode{:Throw}()
Drvi marked this conversation as resolved.
Show resolved Hide resolved

include("parse.jl")

function __init__()
nt = isdefined(Base.Threads, :maxthreadid) ? Threads.maxthreadid() : Threads.nthreads()
# Buffers used in parsing when dealing with BigInts, see _divpow10! in parse.jl
resize!(empty!(_BIGINT_10s), nt)
resize!(empty!(_BIGINT_Rs), nt)
return
end

(::Type{T})(x::Real) where {T <: FD} = convert(T, x)

Expand Down Expand Up @@ -413,78 +425,6 @@ function Base.show(io::IO, x::FD{T, f}) where {T, f}
end
end

# parsing

"""
RoundThrows

Raises an `InexactError` if any rounding is necessary.
"""
const RoundThrows = RoundingMode{:Throw}()
Drvi marked this conversation as resolved.
Show resolved Hide resolved

function Base.parse(::Type{FD{T, f}}, str::AbstractString, mode::RoundingMode=RoundNearest) where {T, f}
if !(mode in (RoundThrows, RoundNearest, RoundToZero))
throw(ArgumentError("Unhandled rounding mode $mode"))
end

# Parse exponent information
exp_index = something(findfirst(==('e'), str), 0)
if exp_index > 0
exp = parse(Int, str[(exp_index + 1):end])
sig_end = exp_index - 1
else
exp = 0
sig_end = lastindex(str)
end

# Remove the decimal place from the string
sign = T(first(str) == '-' ? -1 : 1)
dec_index = something(findfirst(==('.'), str), 0)
sig_start = sign < 0 ? 2 : 1
if dec_index > 0
int_str = str[sig_start:(dec_index - 1)] * str[(dec_index + 1):sig_end]
exp -= sig_end - dec_index
else
int_str = str[sig_start:sig_end]
end

# Split the integer string into the value we can represent inside the FixedDecimal and
# the remaining digits we'll use during rounding
int_end = lastindex(int_str)
pivot = int_end + exp - (-f)

a = rpad(int_str[1:min(pivot, int_end)], pivot, '0')
b = lpad(int_str[max(pivot, 1):int_end], int_end - pivot + 1, '0')

# Parse the strings
val = isempty(a) ? T(0) : sign * parse(T, a)
if !isempty(b) && any(!isequal('0'), b[2:end])
if mode == RoundThrows
throw(InexactError(:parse, FD{T, f}, str))
elseif mode == RoundNearest
val += sign * parse_round(T, b, mode)
end
end

reinterpret(FD{T, f}, val)
end

function parse_round(::Type{T}, fractional::AbstractString, ::RoundingMode{:Nearest}) where T
# Note: parsing each digit individually ensures we don't run into an OverflowError
digits = Int8[parse(Int8, d) for d in fractional]
for i in length(digits):-1:2
if digits[i] > 5 || digits[i] == 5 && isodd(digits[i - 1])
if i - 1 == 1
return T(1)
else
digits[i - 1] += 1
end
end
end
return T(0)
end


"""
max_exp10(T)

Expand Down
227 changes: 227 additions & 0 deletions src/parse.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
using Parsers
using Parsers: AbstractConf, SourceType, XOPTIONS, Result
Drvi marked this conversation as resolved.
Show resolved Hide resolved

"""
RoundThrows

Raises an `InexactError` if any rounding is necessary.
"""
const RoundThrows = RoundingMode{:Throw}()

struct FixedDecimalConf{T<:Integer} <: AbstractConf{T}
Drvi marked this conversation as resolved.
Show resolved Hide resolved
f::Int
end
Parsers.conf(::Type{FixedDecimal{T,f}}, opts::Parsers.Options, kw...) where {T<:Integer,f} = FixedDecimalConf{T}(f)
Drvi marked this conversation as resolved.
Show resolved Hide resolved
Parsers.returntype(::Type{FixedDecimal{T,f}}) where {T,f} = T
Drvi marked this conversation as resolved.
Show resolved Hide resolved
function Parsers.result(FD::Type{FixedDecimal{T,f}}, res::Parsers.Result{T}) where {T,f}
Drvi marked this conversation as resolved.
Show resolved Hide resolved
return Parsers.invalid(res.code) ? Result{FD}(res.code, res.tlen) :
Result{FD}(res.code, res.tlen, reinterpret(FD, res.val))
end

const OPTIONS_ROUND_NEAREST = Parsers.Options(rounding=RoundNearest)
const OPTIONS_ROUND_TO_ZERO = Parsers.Options(rounding=RoundToZero)
const OPTIONS_ROUND_THROWS = Parsers.Options(rounding=nothing)

# TODO: a lookup table per type would be faster
@inline _shift(n::T, decpos) where {T} = T(10)^decpos * n

const _BIGINT1 = BigInt(1)
const _BIGINT2 = BigInt(2)
const _BIGINT10 = BigInt(10)
const _BIGINT_10s = BigInt[] # buffer for "remainders" in _divpow10!, accessed via `Parsers.access_threaded`
const _BIGINT_Rs = BigInt[] # buffer for "remainders" in _divpow10!, accessed via `Parsers.access_threaded`

for T in (Base.BitSigned_types..., Base.BitUnsigned_types...)
let bytes = Tuple(codeunits(string(typemax(T))))
# The number of digits an integer of type T can hold
@eval _maxintdigits(::Type{$T}) = $(length(bytes))
end
end

# All `v`s are non-negative
function _unsafe_convert_int(::Type{T}, v::V) where {T<:Integer,V<:Integer}
return sizeof(T) > sizeof(V) ? T(v) :
sizeof(T) < sizeof(V) ? unsafe_trunc(T, v) :
Base.bitcast(T, v)
end
_unsafe_convert_int(::Type{T}, v::BigInt) where {T<:Integer} = unsafe_trunc(T, v)
_unsafe_convert_int(::Type{T}, v::T) where {T<:Integer} = v

function _check_overflows(::Type{T}, v::BigInt, neg::Bool) where {T<:Integer}
return neg ? -v < typemin(T) : v > typemax(T)
end
function _check_overflows(::Type{T}, v::V, neg::Bool) where {T<:Integer,V<:Union{UInt64,UInt128}}
return sizeof(T) <= sizeof(V) && (neg ? v > _unsafe_convert_int(V, typemax(T)) + one(V) : v > typemax(T))
end
_check_overflows(::Type{T}, v::T, neg::Bool) where {T <: Integer} = false

# `x = div(x, 10^pow, mode)`; may set code |= INEXACT for RoundThrows
# x is non-negative, pow is >= 1
# `!` to signal we mutate bigints in-place
function _divpow10!(x::T, code, pow, mode::RoundingMode) where {T}
return div(x, _shift(one(T), pow), mode), code
end
Comment on lines +67 to +69
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not something for right now, but just sharing this because it's so super freaking cool:
#45

Todd came up with this approach to improve the performance of dividing by power of ten. Basically the idea is that you can skip the div, which is a very expensive operation, by instead multiplying by (2^64 / 10 / 2^64), and since you can precompute 2^64 / 10 as a constant, and then / 2^64 can be done by just a bit-shift, you can skip the divide. Really clever!

But I never managed to merge that PR, so............................. :)

I think we just leave that optimization aside here too, but it's fun and something cool to think about for the future! :)

function _divpow10!(x::T, code, pow, ::RoundingMode{:Throw}) where {T}
q, r = divrem(x, _shift(one(T), pow))
r == 0 || (code |= Parsers.INEXACT)
return q, code
end
function _divpow10!(x::BigInt, code, pow, ::RoundingMode{:Nearest})
# adapted from https://github.com/JuliaLang/julia/blob/112554e1a533cebad4cb0daa27df59636405c075/base/div.jl#L217
@inbounds r = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_Rs) # we must not yield here!
@inbounds y = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_10s) # we must not yield here!
Base.GMP.MPZ.set!(y, _BIGINT10) # y = 10
Base.GMP.MPZ.pow_ui!(y, pow) # y = y^pow
Base.GMP.MPZ.tdiv_qr!(x, r, x, y) # x, r = divrem(x, y)
Base.GMP.MPZ.tdiv_q!(y, _BIGINT2) # y = div(y, 2)
iseven(x) && Base.GMP.MPZ.add!(y, _BIGINT1) # y = y + iseven(x)
if r >= y
Base.GMP.MPZ.add!(x, _BIGINT1) # x = x + (r >= y)
end
return x, code
end
function _divpow10!(x::BigInt, code, pow, ::RoundingMode{:ToZero})
@inbounds y = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_10s) # we must not yield here!
Base.GMP.MPZ.set!(y, _BIGINT10) # y = 10
Base.GMP.MPZ.pow_ui!(y, pow) # y = y^pow
Base.GMP.MPZ.tdiv_q!(x, y) # x = div(x, y)
return x, code
end

function _divpow10!(x::BigInt, code, pow, ::RoundingMode{:Throw})
@inbounds y = Parsers.access_threaded(() -> (@static VERSION > v"1.5" ? BigInt(; nbits=256) : BigInt()), _BIGINT_10s) # we must not yield here!
Base.GMP.MPZ.set!(y, _BIGINT10) # y = 10
Base.GMP.MPZ.pow_ui!(y, pow) # y = y^pow
Base.GMP.MPZ.tdiv_qr!(x, y, x, y) # x, y = divrem(x, y)
y == 0 || (code |= Parsers.INEXACT)
return x, code
end

# Rescale the digits we accumulated so far into the the a an integer representing the decimal
Drvi marked this conversation as resolved.
Show resolved Hide resolved
@inline function Parsers.scale(
conf::FixedDecimalConf{T}, ::Parsers.FloatType, digits::V, exp, neg, code, ndigits, f::F, options::Parsers.Options
) where {T,V,F}
Drvi marked this conversation as resolved.
Show resolved Hide resolved
rounding = something(options.rounding, RoundThrows)
# Positive: how many trailing zeroes we need to add to out integer
Drvi marked this conversation as resolved.
Show resolved Hide resolved
# Negative: how many digits are past our precision (we need to handle them in rounding)
decimal_shift = conf.f + exp
# Number of digits we need to accumulate including any trailigng zeros or digits past our precision
backing_integer_digits = ndigits + decimal_shift
may_overflow = backing_integer_digits == _maxintdigits(T)
if iszero(ndigits)
# all digits are zero
i = zero(T)
elseif backing_integer_digits < 0
Drvi marked this conversation as resolved.
Show resolved Hide resolved
# All digits are past our precision, no overflow possible
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'm not understanding the comment quite right, but this makes it sound like it's backwards; I was expecting a phrase like "if any digits are past our precision, overflow is possible" or the inverse of "No digits are past our precision, so no overflow possible"

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Overflow is not possible here but inexact error is possible if we're not okay with rounding. In my mind overflow is when we try to parse "130" FD{Int8,0} and inexact would happen if we tried to parse "100.1" as FD{Int8,0}

i = zero(T)
(rounding === RoundThrows) && (code |= Parsers.INEXACT)
elseif neg && (T <: Unsigned)
# Unsigned types can't represent negative numbers
i = _unsafe_convert_int(T, digits)
code |= Parsers.INVALID
elseif backing_integer_digits > _maxintdigits(T)
i = _unsafe_convert_int(T, digits)
# The number of digits to accumulate is larger than the capacity of T, we overflow
# We don't check for inexact here because we already have an error
code |= Parsers.OVERFLOW
else
if decimal_shift > 0
r = _unsafe_convert_int(T, digits)
i = _shift(r, decimal_shift)
may_overflow && (r >= i) && (code |= Parsers.OVERFLOW)
elseif decimal_shift < 0
if rounding === RoundNearest
r, code = _divpow10!(digits, code, -decimal_shift, RoundNearest)
elseif rounding === RoundToZero
r, code = _divpow10!(digits, code, -decimal_shift, RoundToZero)
else
r, code = _divpow10!(digits, code, -decimal_shift, RoundThrows)
end
Drvi marked this conversation as resolved.
Show resolved Hide resolved
# Now that the digits were rescaled we can check for overflow
# can happen e.g. if digits were unsigned ints and out type is signed
may_overflow && _check_overflows(T, r, neg) && (code |= Parsers.OVERFLOW)
i = _unsafe_convert_int(T, r)
else
may_overflow && _check_overflows(T, digits, neg) && (code |= Parsers.OVERFLOW)
i = _unsafe_convert_int(T, digits)
end
end
out = ifelse(neg, -i, i)
return (out, code)
end

# If we only saw integer digits and not fractional or exponent digits, we just call scale with exp of 0
# To handle type conversions and overflow checks etc.
@inline function Parsers.noscale(conf::FixedDecimalConf{T}, digits::Integer, neg::Bool, code, ndigits, f::F, options::Parsers.Options) where {T,F}
FT = Parsers.FLOAT64 # not used by FixedDecimal parser
exp = 0
return Parsers.scale(conf, FT, digits, exp, neg, code, ndigits, f, options)
end

# We return a value of T -- i.e. the _integer_ backing the FixedDecimal, the reintrpret needs to happen later
@inline function Parsers.typeparser(conf::FixedDecimalConf{T}, source, pos, len, b, code, pl, options) where {T<:Integer}
if !(options.rounding in (nothing, RoundNearest, RoundToZero, RoundThrows))
throw(ArgumentError("Unhandled rounding mode $options.rounding"))
Drvi marked this conversation as resolved.
Show resolved Hide resolved
end

startpos = pos
# begin parsing
neg = b == UInt8('-')
if neg || b == UInt8('+')
pos += 1
Parsers.incr!(source)
if Parsers.eof(source, pos, len)
code |= Parsers.INVALID | Parsers.EOF
x = zero(T)
@goto done
end
b = Parsers.peekbyte(source, pos)
else
# Check if the input is empty
if Parsers.eof(source, pos, len)
code |= Parsers.INVALID | Parsers.EOF
x = zero(T)
@goto done
end
end

if (b - UInt8('0')) <= 0x09 || b == options.decimal
x, code, pos = Parsers.parsedigits(conf, source, pos, len, b, code, options, UInt64(0), neg, startpos, true, 0, nothing)
Drvi marked this conversation as resolved.
Show resolved Hide resolved
else
x = zero(T)
code |= Parsers.INVALID
end
@label done
return pos, code, Parsers.PosLen(pl.pos, pos - pl.pos), x
end

Parsers.supportedtype(::Type{<:FixedDecimal}) = true
Drvi marked this conversation as resolved.
Show resolved Hide resolved

function _base_parse(::Type{FD{T, f}}, source::AbstractString, mode::RoundingMode=RoundNearest) where {T, f}
if !(mode in (RoundThrows, RoundNearest, RoundToZero))
throw(ArgumentError("Unhandled rounding mode $mode"))
end

isempty(source) && throw(("Empty input is not allowed"))
bytes = codeunits(source)
options = mode === RoundNearest ? OPTIONS_ROUND_NEAREST :
mode === RoundToZero ? OPTIONS_ROUND_TO_ZERO :
OPTIONS_ROUND_THROWS
res = Parsers.xparse2(FD{T, f}, bytes, 1, length(bytes), options)
return res
end

function Base.tryparse(::Type{FD{T, f}}, source::AbstractString, mode::RoundingMode=RoundNearest) where {T, f}
res = _base_parse(FD{T, f}, source, mode)
# If we didn't reach eof, there was some garbage at the end of the string after something that looked like a number
return (Parsers.eof(res.code) && Parsers.ok(res.code)) ? res.val : nothing
end

function Base.parse(::Type{FD{T, f}}, source::AbstractString, mode::RoundingMode=RoundNearest) where {T, f}
res = _base_parse(FD{T, f}, source, mode)
Parsers.inexact(res.code) && throw(InexactError(:parse, FD{T, f}, source))
Parsers.overflow(res.code) && throw(OverflowError("overflow parsing $(repr(source)) as $(FD{T, f})"))
# If we didn't reach eof, there was some garbage at the end of the string after something that looked like a number
(!Parsers.eof(res.code) || Parsers.invalid(res.code)) && throw(ArgumentError("cannot parse $(repr(source)) as $(FD{T, f})"))
return res.val
end
Loading