Skip to content

Various optimizations #68

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 38 additions & 36 deletions src/Loess.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ export loess, predict
include("kd.jl")


mutable struct LoessModel{T <: AbstractFloat}
xs::AbstractMatrix{T} # An n by m predictor matrix containing n observations from m predictors
ys::AbstractVector{T} # A length n response vector
struct LoessModel{T <: AbstractFloat}
xs::Matrix{T} # An n by m predictor matrix containing n observations from m predictors
ys::Vector{T} # A length n response vector
predictions_and_gradients::Dict{Vector{T}, Vector{T}} # kd-tree vertexes mapped to prediction and gradient at each vertex
kdtree::KDTree{T}
end
Expand Down Expand Up @@ -44,6 +44,10 @@ function loess(
degree::Integer = 2,
cell::AbstractFloat = 0.2
) where T<:AbstractFloat

Base.require_one_based_indexing(xs)
Base.require_one_based_indexing(ys)

if size(xs, 1) != size(ys, 1)
throw(DimensionMismatch("Predictor and response arrays must of the same length"))
end
Expand Down Expand Up @@ -80,8 +84,12 @@ function loess(
end

# distance to each point
for i in 1:n
ds[i] = euclidean(vec(vert), vec(xs[i,:]))
@inbounds for i in 1:n
s = zero(T)
for j in 1:m
s += (xs[i, j] - vert[j])^2
end
ds[i] = sqrt(s)
end

# find the q closest points
Expand Down Expand Up @@ -128,7 +136,7 @@ function loess(
]
end

LoessModel{T}(xs, ys, predictions_and_gradients, kdtree)
LoessModel(xs, ys, predictions_and_gradients, kdtree)
end

loess(xs::AbstractVector{T}, ys::AbstractVector{T}; kwargs...) where {T<:AbstractFloat} =
Expand All @@ -153,50 +161,44 @@ end
# Returns:
# A length n' vector of predicted response values.
#
function predict(model::LoessModel, z::Real)
predict(model, [z])
end

function predict(model::LoessModel, zs::AbstractVector)

Base.require_one_based_indexing(zs)
function predict(model::LoessModel{T}, z::Number) where T
adjacent_verts = traverse(model.kdtree, (T(z),))

m = size(model.xs, 2)
@assert(length(adjacent_verts) == 2)
v₁, v₂ = adjacent_verts[1][1], adjacent_verts[2][1]

# in the univariate case, interpret a non-singleton zs as vector of
# ponits, not one point
if m == 1 && length(zs) > 1
return predict(model, reshape(zs, (length(zs), 1)))
if z == v₁ || z == v₂
return first(model.predictions_and_gradients[[z]])
end

if length(zs) != m
error("$(m)-dimensional model applied to length $(length(zs)) vector")
end
y₁, dy₁ = model.predictions_and_gradients[[v₁]]
y₂, dy₂ = model.predictions_and_gradients[[v₂]]

adjacent_verts = traverse(model.kdtree, zs)
b_int = cubic_interpolation(v₁, y₁, dy₁, v₂, y₂, dy₂)

if m == 1
@assert(length(adjacent_verts) == 2)
z = zs[1]
v₁, v₂ = adjacent_verts[1][1], adjacent_verts[2][1]
return evalpoly(z, b_int)
end

if z == v₁ || z == v₂
return first(model.predictions_and_gradients[[z]])
end
function predict(model::LoessModel, zs::AbstractVector)
if size(model.xs, 2) > 1
throw(ArgumentError("multivariate blending not yet implemented"))
end

y₁, dy₁ = model.predictions_and_gradients[[v₁]]
y₂, dy₂ = model.predictions_and_gradients[[v₂]]
return [predict(model, z) for z in zs]
end

b_int = cubic_interpolation(v₁, y₁, dy₁, v₂, y₂, dy₂)
function predict(model::LoessModel, zs::AbstractMatrix)
if size(model.xs, 2) != size(zs, 2)
throw(DimensionMismatch("number of columns in input matrix must match the number of columns in the model matrix"))
end

return evalpoly(z, b_int)
if size(zs, 2) == 1
return predict(model, vec(zs))
else
error("Multivariate blending not yet implemented")
return [predict(model, row) for row in eachrow(zs)]
end
end

predict(model::LoessModel, zs::AbstractMatrix) = map(Base.Fix1(predict, model), eachrow(zs))

"""
tricubic(u)

Expand Down
51 changes: 25 additions & 26 deletions src/kd.jl
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
# Simple static kd-trees.

abstract type KDNode end

struct KDLeafNode <: KDNode
end

struct KDInternalNode{T <: AbstractFloat} <: KDNode
struct KDNode{T <: AbstractFloat}
j::Int # dimension on which the data is split
med::T # median value where the split occours
leftnode::KDNode
rightnode::KDNode
leftnode::Union{Nothing, KDNode{T}}
rightnode::Union{Nothing, KDNode{T}}
end


struct KDTree{T <: AbstractFloat}
xs::AbstractMatrix{T} # A matrix of n, m-dimensional observations
xs::Matrix{T} # A matrix of n, m-dimensional observations
perm::Vector{Int} # permutation of data to avoid modifying xs
root::KDNode # root node
root::KDNode{T} # root node
verts::Set{Vector{T}}
bounds::Matrix{T} # Top-level bounding box
end
Expand Down Expand Up @@ -114,7 +109,7 @@ Modifies:
`perm`, `verts`

Returns:
Either a `KDLeafNode` or a `KDInternalNode`
Either a `nothing` or a `KDNode`
"""
function build_kdtree(xs::AbstractMatrix{T},
perm::AbstractVector,
Expand All @@ -130,7 +125,7 @@ function build_kdtree(xs::AbstractMatrix{T},

if length(perm) <= leaf_size_cutoff || diameter(bounds) <= leaf_diameter_cutoff
@debug "Creating leaf node" length(perm) leaf_size_cutoff diameter(bounds) leaf_diameter_cutoff
return KDLeafNode()
return nothing
end

# split on the dimension with the largest spread
Expand Down Expand Up @@ -226,7 +221,7 @@ function build_kdtree(xs::AbstractMatrix{T},
push!(verts, T[vert...])
end

KDInternalNode{T}(j, med, leftnode, rightnode)
KDNode(j, med, leftnode, rightnode)
end


Expand All @@ -246,14 +241,15 @@ end
Traverse the tree `kdtree` to the bottom and return the verticies of
the bounding hypercube of the leaf node containing the point `x`.
"""
function traverse(kdtree::KDTree, x::AbstractVector)
function traverse(kdtree::KDTree{T}, x::NTuple{N,T}) where {N,T}

m = size(kdtree.bounds, 2)

if length(x) != m
if N != m
throw(DimensionMismatch("$(m)-dimensional kd-tree searched with a length $(length(x)) vector."))
end

for j in 1:m
for j in 1:N
if x[j] < kdtree.bounds[1, j] || x[j] > kdtree.bounds[2, j]
error(
"""
Expand All @@ -266,15 +262,18 @@ function traverse(kdtree::KDTree, x::AbstractVector)

bounds = copy(kdtree.bounds)
node = kdtree.root
while !isa(node, KDLeafNode)
if x[node.j] <= node.med
bounds[2, node.j] = node.med
node = node.leftnode
else
bounds[1, node.j] = node.med
node = node.rightnode
end
end

bounds_verts(bounds)
return _traverse!(bounds, node, x)
end

_traverse!(bounds, node::Nothing, x) = bounds
function _traverse!(bounds, node::KDNode, x)
if x[node.j] <= node.med
bounds[2, node.j] = node.med
return _traverse!(bounds, node.leftnode, x)
else
bounds[1, node.j] = node.med
return _traverse!(bounds, node.rightnode, x)
end
end