implement float <-> 128-bit int conversions in Julia

JuliaLang · Nov 25, 2014 · 239bc2c · 239bc2c
1 parent b70617f
commit 239bc2c
Show file tree

Hide file tree

Showing 2 changed files with 106 additions and 6 deletions.
diff --git a/base/float.jl b/base/float.jl
@@ -5,19 +5,81 @@ for t in (Bool,Char,Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64)
 end
 
 for t1 in (Float32,Float64)
-    for st in (Int8,Int16,Int32,Int64,Int128)
+    for st in (Int8,Int16,Int32,Int64)
         @eval begin
             convert(::Type{$t1},x::($st)) = box($t1,sitofp($t1,unbox($st,x)))
             promote_rule(::Type{$t1}, ::Type{$st}  ) = $t1
         end
     end
-    for ut in (Bool,Char,UInt8,UInt16,UInt32,UInt64,UInt128)
+    for ut in (Bool,Char,UInt8,UInt16,UInt32,UInt64)
         @eval begin
             convert(::Type{$t1},x::($ut)) = box($t1,uitofp($t1,unbox($ut,x)))
             promote_rule(::Type{$t1}, ::Type{$ut}  ) = $t1
         end
     end
 end
+
+promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64
+promote_rule(::Type{Float64}, ::Type{Int128}) = Float64
+promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32
+promote_rule(::Type{Float32}, ::Type{Int128}) = Float32
+
+function convert(::Type{Float64}, x::UInt128)
+    x == 0 && return 0.0
+    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    if n <= 53
+        y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff
+    else
+        y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit
+        y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent)
+        y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even
+    end
+    reinterpret(Float64,((n+1022)<<52) % UInt64 + y)
+end
+
+function convert(::Type{Float64}, x::Int128)
+    x == 0 && return 0.0
+    s = ((x >>> 64) % UInt64) & 0x8000_0000_0000_0000 # sign bit
+    x = abs(x) % UInt128
+    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    if n <= 53
+        y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff
+    else
+        y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit
+        y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent)
+        y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even
+    end
+    reinterpret(Float64, (s | ((n+1022)<<52) % UInt64) + y)
+end
+
+function convert(::Type{Float32}, x::UInt128)
+    x == 0 && return 0f0
+    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    if n <= 24
+        y = ((x % UInt32) << (24-n)) & 0x007f_ffff
+    else
+        y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
+        y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
+        y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
+    end
+    reinterpret(Float32,((n+126)<<23) % UInt32 + y)
+end
+
+function convert(::Type{Float32}, x::Int128)
+    x == 0 && return 0f0
+    s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit
+    x = abs(x) % UInt128
+    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    if n <= 24
+        y = ((x % UInt32) << (24-n)) & 0x007f_ffff
+    else
+        y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
+        y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
+        y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
+    end
+    reinterpret(Float32, (s | ((n+126)<<23) % UInt32) + y)
+end
+
 #convert(::Type{Float16}, x::Float32) = box(Float16,fptrunc(Float16,x))
 convert(::Type{Float16}, x::Float64) = convert(Float16, convert(Float32,x))
 convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,x))
@@ -44,19 +106,48 @@ float32(x) = convert(Float32, x)
 float64(x) = convert(Float64, x)
 float(x)   = convert(FloatingPoint, x)
 
-for Ti in (Int8, Int16, Int32, Int64, Int128)
+for Ti in (Int8, Int16, Int32, Int64)
     @eval begin
         unsafe_trunc(::Type{$Ti}, x::Float32) = box($Ti,fptosi($Ti,unbox(Float32,x)))
         unsafe_trunc(::Type{$Ti}, x::Float64) = box($Ti,fptosi($Ti,unbox(Float64,x)))
     end
 end
-for Ti in (UInt8, UInt16, UInt32, UInt64, UInt128)
+for Ti in (UInt8, UInt16, UInt32, UInt64)
     @eval begin
         unsafe_trunc(::Type{$Ti}, x::Float32) = box($Ti,fptoui($Ti,unbox(Float32,x)))
         unsafe_trunc(::Type{$Ti}, x::Float64) = box($Ti,fptoui($Ti,unbox(Float64,x)))
     end
 end
 
+function unsafe_trunc(::Type{UInt128}, x::Float64)
+    xu = reinterpret(UInt64,x)
+    k = int(xu >> 52) & 0x07ff - 1075
+    xu = (xu & 0x000f_ffff_ffff_ffff) | 0x0010_0000_0000_0000
+    if k <= 0
+        UInt128(xu >> -k)
+    else
+        UInt128(xu) << k
+    end
+end
+function unsafe_trunc(::Type{Int128}, x::Float64)
+    copysign(unsafe_trunc(UInt128,x) % Int128, x)
+end
+
+function unsafe_trunc(::Type{UInt128}, x::Float32)
+    xu = reinterpret(UInt32,x)
+    k = int(xu >> 23) & 0x00ff - 150
+    xu = (xu & 0x007f_ffff) | 0x0080_0000
+    if k <= 0
+        UInt128(xu >> -k)
+    else
+        UInt128(xu) << k
+    end
+end
+function unsafe_trunc(::Type{Int128}, x::Float32)
+    copysign(unsafe_trunc(UInt128,x) % Int128, x)
+end
+
+
 # matches convert methods
 # also determines floor, ceil, round
 trunc(::Type{Signed}, x::Float32) = trunc(Int,x)

diff --git a/base/int.jl b/base/int.jl
@@ -179,20 +179,29 @@ rem{T<:Integer}(x::T, ::Type{T}) = x
 rem(x::Integer, ::Type{Bool}) = ((x&1)!=0)
 mod{T<:Integer}(x::Integer, ::Type{T}) = rem(x, T)
 
-for to in (Int8, Int16, Int32, Int64, Int128)
+for to in (Int8, Int16, Int32, Int64)
     @eval begin
         convert(::Type{$to}, x::Float32) = box($to,checked_fptosi($to,unbox(Float32,x)))
         convert(::Type{$to}, x::Float64) = box($to,checked_fptosi($to,unbox(Float64,x)))
     end
 end
 
-for to in (UInt8, UInt16, UInt32, UInt64, UInt128)
+for to in (UInt8, UInt16, UInt32, UInt64)
     @eval begin
         convert(::Type{$to}, x::Float32) = box($to,checked_fptoui($to,unbox(Float32,x)))
         convert(::Type{$to}, x::Float64) = box($to,checked_fptoui($to,unbox(Float64,x)))
     end
 end
 
+for Ti in (Int128,UInt128)
+    for Tf in (Float32,Float64)
+        @eval function convert(::Type{$Ti},x::$Tf)
+            isinteger(x) || throw(InexactError())
+            trunc($Ti,x)
+        end
+    end
+end
+
 convert(::Type{Signed}, x::UInt8  ) = convert(Int8,x)
 convert(::Type{Signed}, x::UInt16 ) = convert(Int16,x)
 convert(::Type{Signed}, x::UInt32 ) = convert(Int32,x)