From 89e1d2e6c774e21c910df2587538f25bea317b0a Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Fri, 18 Sep 2020 10:25:27 -0400 Subject: [PATCH] Add vcat and allocate enough memory. --- src/broadcast.jl | 5 +++++ src/initialization.jl | 7 ++++--- src/miscellaneous.jl | 15 +++++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index 49da9db..2ea7e85 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -346,3 +346,8 @@ LoopVectorization.vmaterialize!(dest, bc::Base.Broadcast.Broadcasted{<:AbstractS LoopVectorization.vmaterialize(bc::StrideArrayProduct) = Base.Broadcast.materialize(bc) LoopVectorization.vmaterialize!(dest, bc::StrideArrayProduct) = Base.Broadcast.materialize!(dest, bc) +Base.:(+)(A::AbstractStrideArray, B::AbstractStrideArray) = A .+ B +Base.:(-)(A::AbstractStrideArray, B::AbstractStrideArray) = A .- B + +Base.unaliascopy(A::AbstractStrideArray) = A + diff --git a/src/initialization.jl b/src/initialization.jl index 1553c74..8b5bd0c 100644 --- a/src/initialization.jl +++ b/src/initialization.jl @@ -97,7 +97,7 @@ end q, st, xt, xv, L = partially_sized(sv, pad, T) SN = length(st.args); XN = length(xt.args) # W = VectorizationBase.pick_vector_width(T) - push!(q.args, :(parent = Vector{$T}(undef, $L))) + push!(q.args, :(parent = Vector{$T}(undef, $L + $(pick_vector_width(T) - 1)))) push!(q.args, :(StrideArray{$S,$T,$N,$(ctuple(xv)),$SN,$XN,false}(align(pointer(parent)), $st, $xt, parent))) q end @@ -128,7 +128,7 @@ end any(s -> s == -1, sv) || return Expr(:block, Expr(:meta,:inline), :(StrideArray{$S,$T}(undef))) q, st, xt, xv, L = partially_sized(sv, pad, T) SN = length(st.args); XN = length(xt.args) - push!(q.args, :(parent = Vector{$T}(undef, $L))) + push!(q.args, :(parent = Vector{$T}(undef, $L + $(VectorizationBase.pick_vector_width(T) - 1)))) push!(q.args, :(StrideArray{$S,$T,$N,$(ctuple(xv)),$SN,$XN,false}(align(pointer(parent)), $st, $xt, parent))) q end @@ -145,7 +145,8 @@ end for s in @view(sv[1:end-1]) push!(X.args, (x *= s)) end - Expr(:block, Expr(:meta,:inline), :(StrideArray{$S,$T,$N,$X,0,0,false}(align(pointer(parent)), (), (), parent))) + # Expr(:block, Expr(:meta,:inline), :(StrideArray{$S,$T,$N,$X,0,0,false}(align(pointer(parent)), (), (), parent))) + Expr(:block, Expr(:meta,:inline), :(out = StrideArray{$S,$T,$N,$X,0,0,false}(align(pointer(parent)), (), (), parent)), :(@assert length(out) ≥ length(parent)), :out) end q, st, xt, xv, L = partially_sized(sv, pad, T) SN = length(st.args); XN = length(xt.args) diff --git a/src/miscellaneous.jl b/src/miscellaneous.jl index f38b940..a2d437d 100644 --- a/src/miscellaneous.jl +++ b/src/miscellaneous.jl @@ -7,3 +7,18 @@ function maximum(::typeof(abs), A::AbstractStrideArray{S,T}) where {S,T} s end +function Base.vcat(A::AbstractStrideMatrix, B::AbstractStrideMatrix) + sA1 = maybestaticsize(A,Val{1}()) + out = allocarray(promote_type(eltype(A), eltype(B)), (PaddedMatrices.vadd(sA1, maybestaticsize(B,Val{1}())) , maybestaticsize(A,Val{2}()))) + # GC.@preserve out A B begin + # outP = PtrArray(outP); AP = PtrArray(A); BP = PtrArray(B); + @avx for j ∈ axes(A,2), i ∈ axes(A,1) + out[i,j] = A[i,j] + end + @avx for j ∈ axes(B,2), i ∈ axes(B,1) + out[i + sA1,j] = B[i,j] + end + out +end + +