Skip to content

Commit 000a4da

Browse files
committed
add option for uninomials fuzzy matching (close #105)
1 parent 43067ef commit 000a4da

File tree

14 files changed

+173
-859
lines changed

14 files changed

+173
-859
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## [v1.1.1] - 2023-06-22 Thu
4+
5+
- Add [#105]: option for uninomials fuzzy-matching.
6+
37
## [v1.1.0] - 2023-05-12 Fri
48

59
- Add [#104]: name-string widget.
@@ -207,6 +211,8 @@ This document follows [changelog guidelines]
207211

208212
<!-- VERSIONS -->
209213

214+
[v1.1.1]: https://github.com/gnames/gnverifier/compare/v1.1.0...v1.1.1
215+
[v1.1.0]: https://github.com/gnames/gnverifier/compare/v1.0.3...v1.1.0
210216
[v1.0.3]: https://github.com/gnames/gnverifier/compare/v1.0.2...v1.0.3
211217
[v1.0.2]: https://github.com/gnames/gnverifier/compare/v1.0.1...v1.0.2
212218
[v1.0.1]: https://github.com/gnames/gnverifier/compare/v1.0.0...v1.0.1

README.md

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,32 +13,33 @@ search feature.
1313
* [Citing](#citing)
1414
* [Features](#features)
1515
* [Installation](#installation)
16-
* [Using Homebrew on Mac OS X, Linux, and Linux on Windows ([WSL2])](#using-homebrew-on-mac-os-x-linux-and-linux-on-windows-wsl2)
17-
* [MS Windows](#ms-windows)
18-
* [Linux and Mac (without Homebrew)](#linux-and-mac-without-homebrew)
19-
* [Compile from source](#compile-from-source)
16+
* [Using Homebrew on Mac OS X, Linux, and Linux on Windows ([WSL2])](#using-homebrew-on-mac-os-x-linux-and-linux-on-windows-wsl2)
17+
* [MS Windows](#ms-windows)
18+
* [Linux and Mac (without Homebrew)](#linux-and-mac-without-homebrew)
19+
* [Compile from source](#compile-from-source)
2020
* [Usage](#usage)
21-
* [As a web service](#as-a-web-service)
22-
* [As a RESTful API](#as-a-restful-api)
23-
* [One name-string](#one-name-string)
24-
* [Many name-strings in a file](#many-name-strings-in-a-file)
25-
* [Advanced search](#advanced-search)
26-
* [Options and flags](#options-and-flags)
27-
* [help](#help)
28-
* [version](#version)
29-
* [port](#port)
30-
* [all_matches](#all_matches)
31-
* [capitalize](#capitalize)
32-
* [species group](#species-group)
33-
* [format](#format)
34-
* [jobs](#jobs)
35-
* [quiet](#quiet)
36-
* [sources](#sources)
37-
* [web-logs](#web-logs)
38-
* [nsqd-tcp](#nsqd-tcp)
39-
* [Configuration file](#configuration-file)
40-
* [Advanced Search Query Language](#advanced-search-query-language)
41-
* [Examples of searches](#examples-of-searches)
21+
* [As a web service](#as-a-web-service)
22+
* [As a RESTful API](#as-a-restful-api)
23+
* [One name-string](#one-name-string)
24+
* [Many name-strings in a file](#many-name-strings-in-a-file)
25+
* [Advanced search](#advanced-search)
26+
* [Options and flags](#options-and-flags)
27+
* [help](#help)
28+
* [version](#version)
29+
* [port](#port)
30+
* [all_matches](#all_matches)
31+
* [capitalize](#capitalize)
32+
* [species group](#species-group)
33+
* [fuzzy-match of uninomial names](#fuzzy-match-of-uninomial-names)
34+
* [format](#format)
35+
* [jobs](#jobs)
36+
* [quiet](#quiet)
37+
* [sources](#sources)
38+
* [web-logs](#web-logs)
39+
* [nsqd-tcp](#nsqd-tcp)
40+
* [Configuration file](#configuration-file)
41+
* [Advanced Search Query Language](#advanced-search-query-language)
42+
* [Examples of searches](#examples-of-searches)
4243
* [Copyright](#copyright)
4344

4445
<!-- vim-markdown-toc -->
@@ -273,6 +274,17 @@ gnverifier -g "Bubo bubo"
273274
gnverifier --species_group "Bubo bubo"
274275
```
275276

277+
#### fuzzy-match of uninomial names
278+
279+
When `fuzzy_uninomial` flag is on, uninomials are allowed to go through
280+
fuzzy matching, if needed. Normally this flag is off because fuzzy-matched
281+
uninomials create a significant amount of false positives.
282+
283+
```bash
284+
gnverifier -z "Pomatmus"
285+
gnverifier --fuzzy_uninomial "Pomatmus"
286+
```
287+
276288
#### format
277289

278290
Allows to pick a format for output. Supported formats are

cmd/gnverifier.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@
3232
#
3333
# WithSpeciesGroup false
3434

35+
# WithUninomialFuzzyMatch is a boolean flag. If it is true,
36+
# uninomial names are verified using fuzzy matching. Beware of
37+
# a significant amount of false positives when this flag is used.
38+
#
39+
# WithUninomialFuzzyMatch false
40+
3541
# VerifierURL is a URL to gnames REST API
3642
#
3743
# VerifierURL: "https://verifier.globalnames.org/api/v1/"

cmd/root.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,18 @@ var (
4242
// cfgData purpose is to achieve automatic import of data from the
4343
// configuration file, if it exists.
4444
type cfgData struct {
45-
DataSources []int
46-
Format string
47-
Jobs int
48-
NsqdContainsFilter string
49-
NsqdRegexFilter string
50-
NsqdTCPAddress string
51-
VerifierURL string
52-
WithAllMatches bool
53-
WithCapitalization bool
54-
WithSpeciesGroup bool
55-
WithWebLogs bool
45+
DataSources []int
46+
Format string
47+
Jobs int
48+
NsqdContainsFilter string
49+
NsqdRegexFilter string
50+
NsqdTCPAddress string
51+
VerifierURL string
52+
WithAllMatches bool
53+
WithCapitalization bool
54+
WithSpeciesGroup bool
55+
WithUninomialFuzzyMatch bool
56+
WithWebLogs bool
5657
}
5758

5859
// rootCmd represents the base command when called without any subcommands
@@ -96,6 +97,11 @@ https://github.com/gnames/gnverifier
9697
opts = append(opts, config.OptWithSpeciesGroup(true))
9798
}
9899

100+
fuzzyUni, _ := cmd.Flags().GetBool("fuzzy_uninomial")
101+
if fuzzyUni {
102+
opts = append(opts, config.OptWithUninomialFuzzyMatch(true))
103+
}
104+
99105
formatString, _ := cmd.Flags().GetString("format")
100106
frmt, _ := gnfmt.NewFormat(formatString)
101107
if frmt == gnfmt.FormatNone {
@@ -192,6 +198,8 @@ func init() {
192198
rootCmd.Flags().IntP("port", "p", 0, "Port to run web GUI.")
193199
rootCmd.Flags().BoolP("all_matches", "M", false, "return all matched results per source, not just the best one.")
194200
rootCmd.Flags().BoolP("species_group", "g", false, "searching for species names also searches their species groups.")
201+
rootCmd.Flags().BoolP("fuzzy_uninomial", "z", false,
202+
"allows fuzzy matching for uninomial names.")
195203
rootCmd.Flags().BoolP("quiet", "q", false, "do not show progress")
196204
rootCmd.Flags().StringP("sources", "s", "", `IDs of important data-sources to verify against (ex "1,11").
197205
If sources are set and there are matches to their data,
@@ -246,6 +254,10 @@ func initConfig() {
246254
_ = viper.BindEnv("WithAllMatches", "GNV_WITH_ALL_MATCHES")
247255
_ = viper.BindEnv("WithCapitalization", "GNV_WITH_CAPITALIZATION")
248256
_ = viper.BindEnv("WithSpeciesGroup", "GNV_WITH_SPECIES_GROUP")
257+
_ = viper.BindEnv(
258+
"WithUninomialFuzzyMatch",
259+
"GNV_WITH_UNINOMIAL_FUZZY_MATCH",
260+
)
249261
_ = viper.BindEnv("WithWebLogs", "GNV_WITH_WEB_LOGS")
250262

251263
viper.AutomaticEnv() // read in environment variables that match
@@ -304,6 +316,9 @@ func getOpts() {
304316
if cfg.WithSpeciesGroup {
305317
opts = append(opts, config.OptWithSpeciesGroup(true))
306318
}
319+
if cfg.WithUninomialFuzzyMatch {
320+
opts = append(opts, config.OptWithUninomialFuzzyMatch(true))
321+
}
307322
if cfg.WithWebLogs {
308323
opts = append(opts, config.OptWithWebLogs(true))
309324
}

go.mod

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,18 @@ require (
66
github.com/dnaeon/go-vcr v1.2.0
77
github.com/dustin/go-humanize v1.0.1
88
github.com/gnames/gnfmt v0.4.1
9-
github.com/gnames/gnlib v0.15.0
9+
github.com/gnames/gnlib v0.18.0
1010
github.com/gnames/gnquery v0.3.3
1111
github.com/gnames/gnsys v0.2.2
1212
github.com/gnames/gnuuid v0.1.1
1313
github.com/labstack/echo/v4 v4.10.2
1414
github.com/labstack/gommon v0.4.0
15-
github.com/maxbrunsfeld/counterfeiter/v6 v6.6.1
16-
github.com/rs/zerolog v1.29.0
15+
github.com/maxbrunsfeld/counterfeiter/v6 v6.6.2
16+
github.com/rs/zerolog v1.29.1
1717
github.com/sfgrp/lognsq v0.1.1
18-
github.com/spf13/cobra v1.6.1
19-
github.com/spf13/viper v1.15.0
20-
github.com/stretchr/testify v1.8.2
18+
github.com/spf13/cobra v1.7.0
19+
github.com/spf13/viper v1.16.0
20+
github.com/stretchr/testify v1.8.4
2121
gopkg.in/yaml.v2 v2.4.0
2222
)
2323

@@ -29,7 +29,7 @@ require (
2929
github.com/golang/snappy v0.0.3 // indirect
3030
github.com/google/uuid v1.2.0 // indirect
3131
github.com/hashicorp/hcl v1.0.0 // indirect
32-
github.com/inconshreveable/mousetrap v1.0.1 // indirect
32+
github.com/inconshreveable/mousetrap v1.1.0 // indirect
3333
github.com/json-iterator/go v1.1.12 // indirect
3434
github.com/magiconair/properties v1.8.7 // indirect
3535
github.com/mattn/go-colorable v0.1.13 // indirect
@@ -38,25 +38,25 @@ require (
3838
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
3939
github.com/modern-go/reflect2 v1.0.2 // indirect
4040
github.com/nsqio/go-nsq v1.1.0 // indirect
41-
github.com/pelletier/go-toml/v2 v2.0.6 // indirect
41+
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
4242
github.com/pmezard/go-difflib v1.0.0 // indirect
4343
github.com/pointlander/compress v1.1.1-0.20190518213731-ff44bd196cc3 // indirect
4444
github.com/pointlander/jetset v1.0.1-0.20190518214125-eee7eff80bd4 // indirect
4545
github.com/pointlander/peg v1.0.1 // indirect
46-
github.com/spf13/afero v1.9.3 // indirect
47-
github.com/spf13/cast v1.5.0 // indirect
46+
github.com/spf13/afero v1.9.5 // indirect
47+
github.com/spf13/cast v1.5.1 // indirect
4848
github.com/spf13/jwalterweatherman v1.1.0 // indirect
4949
github.com/spf13/pflag v1.0.5 // indirect
5050
github.com/subosito/gotenv v1.4.2 // indirect
5151
github.com/valyala/bytebufferpool v1.0.0 // indirect
5252
github.com/valyala/fasttemplate v1.2.2 // indirect
53-
golang.org/x/crypto v0.6.0 // indirect
54-
golang.org/x/mod v0.7.0 // indirect
55-
golang.org/x/net v0.7.0 // indirect
56-
golang.org/x/sys v0.5.0 // indirect
57-
golang.org/x/text v0.7.0 // indirect
53+
golang.org/x/crypto v0.10.0 // indirect
54+
golang.org/x/mod v0.11.0 // indirect
55+
golang.org/x/net v0.11.0 // indirect
56+
golang.org/x/sys v0.9.0 // indirect
57+
golang.org/x/text v0.10.0 // indirect
5858
golang.org/x/time v0.3.0 // indirect
59-
golang.org/x/tools v0.5.0 // indirect
59+
golang.org/x/tools v0.9.3 // indirect
6060
gopkg.in/ini.v1 v1.67.0 // indirect
6161
gopkg.in/yaml.v3 v3.0.1 // indirect
6262
)

0 commit comments

Comments
 (0)