@@ -248,12 +248,43 @@ end
248
248
@test scitype (bag_of_words) == Multiset{Textual}
249
249
bag_of_tagged_words = Dict (tagged_word => 5 )
250
250
@test scitype (bag_of_tagged_words) == Multiset{Annotated{Textual}}
251
- @test scitype (Document (" kadsfkj" , " My Document" )) == Unknown
252
- @test scitype (Document ([tagged_word, tagged_word2], " My Other Doc" )) ==
251
+ @test scitype (Document (" My Document" , " kadsfkj" )) == Unknown
252
+ @test scitype (Document ([tagged_word, tagged_word2])) ==
253
+ Annotated{AbstractVector{Annotated{Textual}}}
254
+ @test scitype (Document (" My Other Doc" , [tagged_word, tagged_word2])) ==
253
255
Annotated{AbstractVector{Annotated{Textual}}}
254
256
nested_tokens = [[" dog" , " cat" ], [" bird" , " cat" ]]
255
- @test scitype (Document (nested_tokens), " Essay Number 1" ) ==
256
- Annotated{AbstractVector{AbstractVector{Textual}}}
257
+ @test scitype (Document (" Essay Number 1" , nested_tokens)) ==
258
+ Annotated{AbstractVector{AbstractVector{Textual}}}
259
+
260
+ @test scitype (Dict ((" cat" , " in" ) => 3 )) == Multiset{Tuple{Textual,Textual}}
261
+ bag_of_words = Dict (" cat in" => 1 ,
262
+ " the hat" => 1 ,
263
+ " the" => 2 ,
264
+ " cat" => 1 ,
265
+ " hat" => 1 ,
266
+ " in the" => 1 ,
267
+ " in" => 1 ,
268
+ " the cat" => 1 )
269
+ bag_of_ngrams =
270
+ Dict (Tuple (String .(split (k))) => v for (k, v) in bag_of_words)
271
+ # Dict{Tuple{String, Vararg{String, N} where N}, Int64} with 8 entries:
272
+ # ("cat",) => 1
273
+ # ("cat", "in") => 1
274
+ # ("in",) => 1
275
+ # ("the", "hat") => 1
276
+ # ("the",) => 2
277
+ # ("hat",) => 1
278
+ # ("in", "the") => 1
279
+ # ("the", "cat") => 1
280
+ @test scitype (bag_of_ngrams) == Multiset{NTuple{<: Any ,Textual}}
281
+
282
+ @test scitype (Dict ((tagged_word, tagged_word2) => 3 )) ==
283
+ Multiset{Tuple{Annotated{Textual},Annotated{Textual}}}
284
+ bag_of_ngrams = Dict ((tagged_word, tagged_word2) => 3 ,
285
+ (tagged_word,) => 7 )
286
+ @test scitype (bag_of_ngrams) == Multiset{NTuple{<: Any ,Annotated{Textual}}}
287
+
257
288
end
258
289
259
290
@testset " Autotype+tight" begin
0 commit comments