Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #161: Allow coercion with string names #162

Merged
merged 1 commit into from
Sep 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions src/coerce.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const ColKey = Union{Symbol,AbstractString}

"""
coerce(A, specs...; tight=false, verbosity=1)

Expand All @@ -16,7 +18,9 @@ both the `OldScitype` and `Union{Missing,OldScitype}` cases):

(iii) a dictionary of scientific types keyed on column names:

coerce(X, d::AbstractDict{Symbol, <:Type}; verbosity=1)
coerce(X, d::AbstractDict{<:ColKey, <:Type}; verbosity=1)

where `ColKey = Union{Symbol,AbstractString}`.

### Examples

Expand Down Expand Up @@ -50,7 +54,7 @@ coerce(::Val{:other}, X, a...; kw...) =

_bad_dictionary() = throw(ArgumentError(
"A dictionary specifying a scitype conversion "*
"must have type `AbstractDict{Symbol, <:Type}`. It's keys must "*
"must have type `AbstractDict{<:ColKey, <:Type}`. It's keys must "*
"be column names and its values be scientific types. "*
"E.g., `Dict(:cats=>Continuous, :dogs=>Textual`. "))
coerce(::Val{:table}, X, types_dict::AbstractDict; kw...) =
Expand All @@ -68,7 +72,7 @@ coerce(::Val{:table}, X, specs...; kw...) = _bad_specs()

function coerce(::Val{:table},
X,
types_dict::AbstractDict{Symbol, <:Type};
types_dict::AbstractDict{<:ColKey, <:Type};
kw...)
isempty(types_dict) && return X
names = schema(X).names
Expand Down Expand Up @@ -103,7 +107,7 @@ end
# symbol=>type and type=>type pairs can be specified in place of a
# dictionary:

feature_scitype_pairs(p::Pair{Symbol,<:Type}, X) = [p, ]
feature_scitype_pairs(p::Pair{<:ColKey,<:Type}, X) = [Symbol(first(p)) => last(p), ]
function feature_scitype_pairs(p::Pair{<:Type,<:Type}, X)
from_scitype = first(p)
to_scitype = last(p)
Expand All @@ -121,7 +125,7 @@ for c in (:coerce, :coerce!)
ex = quote
function $c(::Val{:table},
X,
mixed_pairs::Pair{<:Union{Symbol,<:Type},<:Type}...;
mixed_pairs::Pair{<:Union{<:ColKey,<:Type},<:Type}...;
kw...)
components = map(p -> feature_scitype_pairs(p, X), mixed_pairs)
pairs = vcat(components...)
Expand Down Expand Up @@ -170,7 +174,7 @@ coerce!(::Val{:table}, X, specs...; kw...) = _bad_specs()

function coerce!(::Val{:table},
X,
types_dict::AbstractDict{Symbol, <:Type};
types_dict::AbstractDict{<:ColKey, <:Type};
kw...)
# DataFrame --> coerce_df!
if is_type(X, :DataFrames, :DataFrame)
Expand All @@ -189,7 +193,7 @@ end

In place coercion for a dataframe.(Unexported method)
"""
function coerce_df!(df, tdict::AbstractDict{Symbol, <:Type}; kw...)
function coerce_df!(df, tdict::AbstractDict{<:ColKey, <:Type}; kw...)
names = schema(df).names
for name in names
name in keys(tdict) || continue
Expand Down
5 changes: 5 additions & 0 deletions test/basic_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,11 @@ end
@test scitype_union(yc) == Union{Missing,OrderedFactor{3}}
@test scitype_union(y) == Union{Missing,Multiclass{3}}

# tests fix for issue https://github.com/JuliaAI/ScientificTypes.jl/issues/161
X = (x=10:10:44, y=1:4, z=collect("abcd"))
Xc = coerce(X, :x => Continuous, "y" => Continuous)
@test scitype_union(Xc.x) === Continuous
@test scitype_union(Xc.y) === Continuous
end

@testset "coerce arrays" begin
Expand Down