Skip to content

Commit

Permalink
implement float <-> 128-bit int conversions in Julia
Browse files Browse the repository at this point in the history
  • Loading branch information
simonbyrne committed Nov 25, 2014
1 parent b70617f commit 239bc2c
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 6 deletions.
99 changes: 95 additions & 4 deletions base/float.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,81 @@ for t in (Bool,Char,Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64)
end

for t1 in (Float32,Float64)
for st in (Int8,Int16,Int32,Int64,Int128)
for st in (Int8,Int16,Int32,Int64)
@eval begin
convert(::Type{$t1},x::($st)) = box($t1,sitofp($t1,unbox($st,x)))
promote_rule(::Type{$t1}, ::Type{$st} ) = $t1
end
end
for ut in (Bool,Char,UInt8,UInt16,UInt32,UInt64,UInt128)
for ut in (Bool,Char,UInt8,UInt16,UInt32,UInt64)
@eval begin
convert(::Type{$t1},x::($ut)) = box($t1,uitofp($t1,unbox($ut,x)))
promote_rule(::Type{$t1}, ::Type{$ut} ) = $t1
end
end
end

promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64
promote_rule(::Type{Float64}, ::Type{Int128}) = Float64
promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32
promote_rule(::Type{Float32}, ::Type{Int128}) = Float32

function convert(::Type{Float64}, x::UInt128)
x == 0 && return 0.0
n = 128-leading_zeros(x) # ndigits0z(x,2)
if n <= 53
y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff
else
y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit
y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent)
y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even
end
reinterpret(Float64,((n+1022)<<52) % UInt64 + y)
end

function convert(::Type{Float64}, x::Int128)
x == 0 && return 0.0
s = ((x >>> 64) % UInt64) & 0x8000_0000_0000_0000 # sign bit
x = abs(x) % UInt128
n = 128-leading_zeros(x) # ndigits0z(x,2)
if n <= 53
y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff
else
y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit
y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent)
y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even
end
reinterpret(Float64, (s | ((n+1022)<<52) % UInt64) + y)
end

function convert(::Type{Float32}, x::UInt128)
x == 0 && return 0f0
n = 128-leading_zeros(x) # ndigits0z(x,2)
if n <= 24
y = ((x % UInt32) << (24-n)) & 0x007f_ffff
else
y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
end
reinterpret(Float32,((n+126)<<23) % UInt32 + y)
end

function convert(::Type{Float32}, x::Int128)
x == 0 && return 0f0
s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit
x = abs(x) % UInt128
n = 128-leading_zeros(x) # ndigits0z(x,2)
if n <= 24
y = ((x % UInt32) << (24-n)) & 0x007f_ffff
else
y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
end
reinterpret(Float32, (s | ((n+126)<<23) % UInt32) + y)
end

#convert(::Type{Float16}, x::Float32) = box(Float16,fptrunc(Float16,x))
convert(::Type{Float16}, x::Float64) = convert(Float16, convert(Float32,x))
convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,x))
Expand All @@ -44,19 +106,48 @@ float32(x) = convert(Float32, x)
float64(x) = convert(Float64, x)
float(x) = convert(FloatingPoint, x)

for Ti in (Int8, Int16, Int32, Int64, Int128)
for Ti in (Int8, Int16, Int32, Int64)
@eval begin
unsafe_trunc(::Type{$Ti}, x::Float32) = box($Ti,fptosi($Ti,unbox(Float32,x)))
unsafe_trunc(::Type{$Ti}, x::Float64) = box($Ti,fptosi($Ti,unbox(Float64,x)))
end
end
for Ti in (UInt8, UInt16, UInt32, UInt64, UInt128)
for Ti in (UInt8, UInt16, UInt32, UInt64)
@eval begin
unsafe_trunc(::Type{$Ti}, x::Float32) = box($Ti,fptoui($Ti,unbox(Float32,x)))
unsafe_trunc(::Type{$Ti}, x::Float64) = box($Ti,fptoui($Ti,unbox(Float64,x)))
end
end

function unsafe_trunc(::Type{UInt128}, x::Float64)
xu = reinterpret(UInt64,x)
k = int(xu >> 52) & 0x07ff - 1075
xu = (xu & 0x000f_ffff_ffff_ffff) | 0x0010_0000_0000_0000
if k <= 0
UInt128(xu >> -k)
else
UInt128(xu) << k
end
end
function unsafe_trunc(::Type{Int128}, x::Float64)
copysign(unsafe_trunc(UInt128,x) % Int128, x)
end

function unsafe_trunc(::Type{UInt128}, x::Float32)
xu = reinterpret(UInt32,x)
k = int(xu >> 23) & 0x00ff - 150
xu = (xu & 0x007f_ffff) | 0x0080_0000
if k <= 0
UInt128(xu >> -k)
else
UInt128(xu) << k
end
end
function unsafe_trunc(::Type{Int128}, x::Float32)
copysign(unsafe_trunc(UInt128,x) % Int128, x)
end


# matches convert methods
# also determines floor, ceil, round
trunc(::Type{Signed}, x::Float32) = trunc(Int,x)
Expand Down
13 changes: 11 additions & 2 deletions base/int.jl
Original file line number Diff line number Diff line change
Expand Up @@ -179,20 +179,29 @@ rem{T<:Integer}(x::T, ::Type{T}) = x
rem(x::Integer, ::Type{Bool}) = ((x&1)!=0)
mod{T<:Integer}(x::Integer, ::Type{T}) = rem(x, T)

for to in (Int8, Int16, Int32, Int64, Int128)
for to in (Int8, Int16, Int32, Int64)
@eval begin
convert(::Type{$to}, x::Float32) = box($to,checked_fptosi($to,unbox(Float32,x)))
convert(::Type{$to}, x::Float64) = box($to,checked_fptosi($to,unbox(Float64,x)))
end
end

for to in (UInt8, UInt16, UInt32, UInt64, UInt128)
for to in (UInt8, UInt16, UInt32, UInt64)
@eval begin
convert(::Type{$to}, x::Float32) = box($to,checked_fptoui($to,unbox(Float32,x)))
convert(::Type{$to}, x::Float64) = box($to,checked_fptoui($to,unbox(Float64,x)))
end
end

for Ti in (Int128,UInt128)
for Tf in (Float32,Float64)
@eval function convert(::Type{$Ti},x::$Tf)
isinteger(x) || throw(InexactError())
trunc($Ti,x)
end
end
end

convert(::Type{Signed}, x::UInt8 ) = convert(Int8,x)
convert(::Type{Signed}, x::UInt16 ) = convert(Int16,x)
convert(::Type{Signed}, x::UInt32 ) = convert(Int32,x)
Expand Down

0 comments on commit 239bc2c

Please sign in to comment.