Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For a 0.2.2 release #12

Merged
merged 8 commits into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
name = "LearnTestAPI"
uuid = "3111ed91-c4f2-40e7-bb19-7f6c618409b8"
authors = ["Anthony D. Blaom <[email protected]>"]
version = "0.2.1"
version = "0.2.2"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
CategoricalDistributions = "af321ab8-2d2e-40a6-b165-3d674595d28e"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
IsURL = "ceb4388c-583f-448d-bb30-00b11e8c5682"
LearnAPI = "92ad9a40-7767-427a-9ee6-6e577f1266cb"
LearnDataFrontEnds = "5cca22a3-9356-470e-ba1b-8268d0135a4b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MLCore = "c2834f40-e789-41da-a90e-33b280584a8c"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Expand All @@ -22,6 +25,8 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"

[compat]
CategoricalArrays = "0.10.8"
CategoricalDistributions = "0.1.15"
Distributions = "0.25"
InteractiveUtils = "<0.0.1, 1"
IsURL = "0.2.0"
Expand All @@ -46,7 +51,16 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[targets]
test = ["DataFrames", "Distributions", "Random", "LinearAlgebra", "Statistics", "Tables"]
test = [
"DataFrames",
"Distributions",
"Random",
"LinearAlgebra",
"Statistics",
"StatsModels",
"Tables",
]
4 changes: 3 additions & 1 deletion src/LearnTestAPI.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
LearnTestAPI

Module for testing implementations of the interfacde defined in
Module for testing implementations of the interface defined in
[LearnAPI.jl](https://juliaai.github.io/LearnAPI.jl/dev/).

If your package defines an object `learner` implementing the interface, then put something
Expand Down Expand Up @@ -46,12 +46,14 @@ using LinearAlgebra
using Random
using Statistics
using UnPack
import LearnDataFrontEnds

include("tools.jl")
include("logging.jl")
include("testapi.jl")
include("learners/static_algorithms.jl")
include("learners/regression.jl")
include("learners/classification.jl")
include("learners/ensembling.jl")
# next learner excluded because of heavy dependencies:
# include("learners/gradient_descent.jl")
Expand Down
118 changes: 118 additions & 0 deletions src/learners/classification.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# This file defines `ConstantClassifier()`

using LearnAPI
import LearnDataFrontEnds as FrontEnds
import MLCore
import CategoricalArrays
import CategoricalDistributions
import CategoricalDistributions.OrderedCollections.OrderedDict
import CategoricalDistributions.Distributions.StatsBase.proportionmap

# The implementation of a constant classifier below is not the simplest, but it
# demonstrates some patterns that apply more generally in classification, including
# inclusion of the canned data front end, `Sage`.

"""
ConstantClassifier()

Instantiate a constant (dummy) classifier. Can predict `Point` or `Distribution` targets.

"""
struct ConstantClassifier end

struct ConstantClassifierFitted
learner::ConstantClassifier
probabilities
names::Vector{Symbol}
classes_seen
codes_seen
decoder
end

LearnAPI.learner(model::ConstantClassifierFitted) = model.learner

# add a data front end; `obs` will return objects with type `FrontEnds.Obs`:
const front_end = FrontEnds.Sage(code_type=:small)
LearnAPI.obs(learner::ConstantClassifier, data) =
FrontEnds.fitobs(learner, data, front_end)
LearnAPI.obs(model::ConstantClassifierFitted, data) =
obs(model, data, front_end)

# data deconstructors:
LearnAPI.features(learner::ConstantClassifier, data) =
LearnAPI.features(learner, data, front_end)
LearnAPI.target(learner::ConstantClassifier, data) =
LearnAPI.target(learner, data, front_end)

function LearnAPI.fit(learner::ConstantClassifier, observations::FrontEnds.Obs; verbosity=1)
y = observations.target # integer "codes"
names = observations.names
classes_seen = observations.classes_seen
codes_seen = sort(unique(y))
decoder = observations.decoder

d = proportionmap(y)
# proportions ordered by key, i.e., by codes seen:
probabilities = values(sort!(OrderedDict(d))) |> collect

return ConstantClassifierFitted(
learner,
probabilities,
names,
classes_seen,
codes_seen,
decoder,
)
end
LearnAPI.fit(learner::ConstantClassifier, data; kwargs...) =
fit(learner, obs(learner, data); kwargs...)

function LearnAPI.predict(
model::ConstantClassifierFitted,
::Point,
observations::FrontEnds.Obs,
)
n = MLCore.numobs(observations)
idx = argmax(model.probabilities)
code_of_mode = model.codes_seen[idx]
return model.decoder.(fill(code_of_mode, n))
end
LearnAPI.predict(model::ConstantClassifierFitted, ::Point, data) =
predict(model, Point(), obs(model, data))

function LearnAPI.predict(
model::ConstantClassifierFitted,
::Distribution,
observations::FrontEnds.Obs,
)
n = MLCore.numobs(observations)
probs = model.probabilities
# repeat vertically to get rows of a matrix:
probs_matrix = reshape(repeat(probs, n), (length(probs), n))'
return CategoricalDistributions.UnivariateFinite(model.classes_seen, probs_matrix)
end
LearnAPI.predict(model::ConstantClassifierFitted, ::Distribution, data) =
predict(model, Distribution(), obs(model, data))

# accessor function:
LearnAPI.feature_names(model::ConstantClassifierFitted) = model.names

@trait(
ConstantClassifier,
constructor = ConstantClassifier,
kinds_of_proxy = (Point(),Distribution()),
tags = ("classification",),
functions = (
:(LearnAPI.fit),
:(LearnAPI.learner),
:(LearnAPI.clone),
:(LearnAPI.strip),
:(LearnAPI.obs),
:(LearnAPI.features),
:(LearnAPI.target),
:(LearnAPI.predict),
:(LearnAPI.feature_names),
)
)

true
36 changes: 29 additions & 7 deletions src/learners/dimension_reduction.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# This file defines `TruncatedSVD(; codim=1)`

using LearnAPI
using LinearAlgebra
using LinearAlgebra
import LearnDataFrontEnds as FrontEnds


# # DIMENSION REDUCTION USING TRUNCATED SVD DECOMPOSITION

# Recall that truncated SVD reduction is the same as PCA reduction, but without
# centering. We suppose observations are presented as the columns of a `Real` matrix.
# centering.

# Some struct fields are left abstract for simplicity.

Expand All @@ -23,6 +24,11 @@ end
Instantiate a truncated singular value decomposition algorithm for reducing the dimension
of observations by `codim`.

Data can be provided to `fit` or `transform` in any form supported by the `Tarragon` data
front end at LearnDataFrontEnds.jl. However, the outputs of `transform` and
`inverse_transform` are always matrices.


```julia
learner = Truncated()
X = rand(3, 100) # 100 observations in 3-space
Expand All @@ -49,10 +55,21 @@ end

LearnAPI.learner(model::TruncatedSVDFitted) = model.learner

function LearnAPI.fit(learner::TruncatedSVD, X; verbosity=1)
# add a canned data front end; `obs` will return objects of type `FrontEnds.Obs`:
LearnAPI.obs(learner::TruncatedSVD, data) =
FrontEnds.fitobs(learner, data, FrontEnds.Tarragon())
LearnAPI.obs(model::TruncatedSVDFitted, data) =
obs(model, data, FrontEnds.Tarragon())

# training data deconstructor:
LearnAPI.features(learner::TruncatedSVD, data) =
LearnAPI.features(learner, data, FrontEnds.Tarragon())

function LearnAPI.fit(learner::TruncatedSVD, observations::FrontEnds.Obs; verbosity=1)

# unpack hyperparameters:
codim = learner.codim
X = observations.features
p, n = size(X)
n ≥ p || error("Insufficient number observations. ")
outdim = p - codim
Expand All @@ -70,14 +87,19 @@ function LearnAPI.fit(learner::TruncatedSVD, X; verbosity=1)
return TruncatedSVDFitted(learner, U, Ut, singular_values)

end
LearnAPI.fit(learner::TruncatedSVD, data; kwargs...) =
LearnAPI.fit(learner, LearnAPI.obs(learner, data); kwargs...)

LearnAPI.transform(model::TruncatedSVDFitted, X) = model.Ut*X
LearnAPI.transform(model::TruncatedSVDFitted, observations::FrontEnds.Obs) =
model.Ut*(observations.features)
LearnAPI.transform(model::TruncatedSVDFitted, data) =
LearnAPI.transform(model, obs(model, data))

# convenience fit-transform:
LearnAPI.transform(learner::TruncatedSVD, X; kwargs...) =
transform(fit(learner, X; kwargs...), X)
LearnAPI.transform(learner::TruncatedSVD, data; kwargs...) =
transform(fit(learner, data; kwargs...), data)

LearnAPI.inverse_transform(model::TruncatedSVDFitted, W) = model.U*W
LearnAPI.inverse_transform(model::TruncatedSVDFitted, W::AbstractMatrix) = model.U*W

# accessor function:
function LearnAPI.extras(model::TruncatedSVDFitted)
Expand Down
3 changes: 2 additions & 1 deletion src/learners/ensembling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ LearnAPI.components(model::EnsembleFitted) = [:atom => model.models,]
# - `out_of_sample_losses`

# For simplicity, this implementation is restricted to univariate features. The simplistic
# algorithm is explained in the docstring. of the data presented.
# algorithm is explained in the docstring.


# ## HELPERS
Expand Down Expand Up @@ -276,6 +276,7 @@ function update!(
stump = Stump(ξ, left, right)
push!(forest, stump)
new_predictions = _predict(stump, x)

# efficient in-place update of `predictions`:
predictions .= (k*predictions .+ new_predictions)/(k + 1)
push!(training_losses, (predictions[training_indices] .- ytrain).^2 |> sum)
Expand Down
Loading
Loading