Skip to content

Commit b2124a1

Browse files
authored
Merge pull request #170 from JuliaAI/dev
For a 2.3.2 release
2 parents 1ac911a + fb66054 commit b2124a1

File tree

6 files changed

+10
-91
lines changed

6 files changed

+10
-91
lines changed

Project.toml

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
name = "ScientificTypes"
22
uuid = "321657f4-b219-11e9-178b-2701a2544e81"
33
authors = ["Anthony D. Blaom <[email protected]>"]
4-
version = "2.3.1"
4+
version = "2.3.2"
55

66
[deps]
77
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
88
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
99
CorpusLoaders = "214a0ac2-f95b-54f7-a80b-442ed9c2c9e8"
1010
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
1111
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
12-
PersistenceDiagramsBase = "b1ad91c1-539c-4ace-90bd-ea06abc420fa"
1312
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
1413
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
1514
ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
@@ -19,9 +18,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
1918
[compat]
2019
CategoricalArrays = "0.8, 0.9, 0.10"
2120
ColorTypes = "0.9, 0.10, 0.11"
22-
CorpusLoaders = "0.3.2"
2321
Distributions = "0.25.1"
24-
PersistenceDiagramsBase = "0.1"
2522
PrettyTables = "1"
2623
Reexport = "1.2"
2724
ScientificTypesBase = "2.2"

src/ScientificTypes.jl

-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ using Reexport
66
using Tables
77
using CategoricalArrays
88
using ColorTypes
9-
using PersistenceDiagramsBase
109
using CorpusLoaders
1110
using PrettyTables
1211
using Dates

src/convention/scitype.jl

+2-32
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ ColorImage{size(img)...}
1616

1717
# Persistence diagrams
1818

19-
ST.scitype(::PersistenceDiagram, ::DefaultConvention) = PersistenceDiagram
19+
# ST.scitype(::PersistenceDiagram, ::DefaultConvention) = PersistenceDiagram
2020

2121
# CategoricalArray scitype
2222

@@ -66,34 +66,6 @@ ST.scitype(::Distributions.Sampleable{F,S}) where {F,S} =
6666
ST.scitype(::Distributions.Distribution{F,S}) where {F,S} =
6767
Density{space_scitype(F,S)}
6868

69-
# Text analysis - EXPERIMENTAL
70-
71-
# This would be less of a hack if some of #155 were adopted.
72-
73-
type2scitype(T::Type) = ST.Scitype(T, DefaultConvention())
74-
type2scitype(::Type{<:AbstractVector{T}}) where T =
75-
AbstractVector{type2scitype(T)}
76-
type2scitype(::NTuple{N,T}) where {N,T} = NTuple{type2scitype{T}}
77-
const PlainNGram{N} = NTuple{N,<:AbstractString}
78-
const TaggedNGram{N} = NTuple{N,<:CorpusLoaders.TaggedWord}
79-
ST.scitype(::TaggedWord, ::DefaultConvention) = Annotated{Textual}
80-
ST.scitype(::Document{<:AbstractVector{T}}, ::DefaultConvention) where T =
81-
Annotated{AbstractVector{type2scitype(T)}}
82-
ST.scitype(::AbstractDict{<:AbstractString,<:Integer},
83-
::DefaultConvention) = Multiset{Textual}
84-
ST.scitype(::AbstractDict{<:TaggedWord,<:Integer},
85-
::DefaultConvention) = Multiset{Annotated{Textual}}
86-
ST.scitype(::AbstractDict{<:Union{TaggedWord,AbstractString},<:Integer},
87-
::DefaultConvention) =
88-
Multiset{Union{Textual,Annotated{Textual}}}
89-
ST.scitype(::AbstractDict{<:PlainNGram{N}}) where N =
90-
Multiset{NTuple{N,Textual}}
91-
ST.scitype(::AbstractDict{<:TaggedNGram{N}}) where N =
92-
Multiset{NTuple{N,Annotated{Textual}}}
93-
ST.scitype(::AbstractDict{<:PlainNGram}) =
94-
Multiset{NTuple{<:Any,Textual}}
95-
ST.scitype(::AbstractDict{<:TaggedNGram}) =
96-
Multiset{NTuple{<:Any,Annotated{Textual}}}
9769

9870
# Scitype for fast array broadcasting
9971

@@ -104,6 +76,4 @@ ST.Scitype(::Type{<:TimeType}, ::DefaultConvention) = ScientificTimeTy
10476
ST.Scitype(::Type{<:Date}, ::DefaultConvention) = ScientificDate
10577
ST.Scitype(::Type{<:Time}, ::DefaultConvention) = ScientificTime
10678
ST.Scitype(::Type{<:DateTime}, ::DefaultConvention) = ScientificDateTime
107-
ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram
108-
ST.Scitype(::Type{<:TaggedWord}, ::DefaultConvention) =
109-
Annotated{Textual}
79+
# ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram

test/basic_tests.jl

+6-6
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,13 @@ end
8686
@test scitype(gray_image) == GrayImage{10,20}
8787
end
8888

89-
@testset "PersistenceDiagrams" begin
90-
diagram = PersistenceDiagram([(1, Inf), (2, 3)], dim=0)
91-
@test scitype(diagram) == PersistenceDiagram
89+
# @testset "PersistenceDiagrams" begin
90+
# diagram = PersistenceDiagram([(1, Inf), (2, 3)], dim=0)
91+
# @test scitype(diagram) == PersistenceDiagram
9292

93-
diagrams = [diagram, diagram, diagram]
94-
@test scitype(diagrams) == Vec{PersistenceDiagram}
95-
end
93+
# diagrams = [diagram, diagram, diagram]
94+
# @test scitype(diagrams) == Vec{PersistenceDiagram}
95+
# end
9696

9797
@testset "temporal types" begin
9898
d = Date(2020, 4, 21)

test/runtests.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
using Test, ScientificTypes, ScientificTypesBase, Random
22
using Tables, CategoricalArrays, DataFrames
3-
using ColorTypes, PersistenceDiagramsBase, CorpusLoaders
3+
using ColorTypes, CorpusLoaders
44
using Dates
55
# using CSV # dropped until julia release new LTS as issue for 1.0
66
import Distributions

test/scitypes.jl

-47
Original file line numberDiff line numberDiff line change
@@ -240,53 +240,6 @@ end
240240
@test scitype(FooSampleable()) == Sampleable{Count}
241241
end
242242

243-
@testset "text analysis" begin
244-
tagged_word = CorpusLoaders.PosTaggedWord("NN", "wheelbarrow")
245-
tagged_word2 = CorpusLoaders.PosTaggedWord("NN", "soil")
246-
@test scitype(tagged_word) == Annotated{Textual}
247-
bag_of_words = Dict("cat"=>1, "dog"=>3)
248-
@test scitype(bag_of_words) == Multiset{Textual}
249-
bag_of_tagged_words = Dict(tagged_word => 5)
250-
@test scitype(bag_of_tagged_words) == Multiset{Annotated{Textual}}
251-
@test scitype(Document("My Document", "kadsfkj")) == Unknown
252-
@test scitype(Document([tagged_word, tagged_word2])) ==
253-
Annotated{AbstractVector{Annotated{Textual}}}
254-
@test scitype(Document("My Other Doc", [tagged_word, tagged_word2])) ==
255-
Annotated{AbstractVector{Annotated{Textual}}}
256-
nested_tokens = [["dog", "cat"], ["bird", "cat"]]
257-
@test scitype(Document("Essay Number 1", nested_tokens)) ==
258-
Annotated{AbstractVector{AbstractVector{Textual}}}
259-
260-
@test scitype(Dict(("cat", "in") => 3)) == Multiset{Tuple{Textual,Textual}}
261-
bag_of_words = Dict("cat in" => 1,
262-
"the hat" => 1,
263-
"the" => 2,
264-
"cat" => 1,
265-
"hat" => 1,
266-
"in the" => 1,
267-
"in" => 1,
268-
"the cat" => 1)
269-
bag_of_ngrams =
270-
Dict(Tuple(String.(split(k))) => v for (k, v) in bag_of_words)
271-
# Dict{Tuple{String, Vararg{String, N} where N}, Int64} with 8 entries:
272-
# ("cat",) => 1
273-
# ("cat", "in") => 1
274-
# ("in",) => 1
275-
# ("the", "hat") => 1
276-
# ("the",) => 2
277-
# ("hat",) => 1
278-
# ("in", "the") => 1
279-
# ("the", "cat") => 1
280-
@test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Textual}}
281-
282-
@test scitype(Dict((tagged_word, tagged_word2) => 3)) ==
283-
Multiset{Tuple{Annotated{Textual},Annotated{Textual}}}
284-
bag_of_ngrams = Dict((tagged_word, tagged_word2) => 3,
285-
(tagged_word,) => 7)
286-
@test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Annotated{Textual}}}
287-
288-
end
289-
290243
@testset "Autotype+tight" begin
291244
x = [1,2,3,missing];
292245
x = x[1:3]

0 commit comments

Comments
 (0)