Skip to content

Commit db89187

Browse files
feat(brill): train and use Brill tagger (#1344)
Co-authored-by: hippietrail <[email protected]>
1 parent e3e5735 commit db89187

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+51863
-16125
lines changed

Cargo.lock

Lines changed: 107 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[workspace]
2-
members = [ "harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst" , "harper-stats"]
2+
members = [ "harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst" , "harper-stats", "harper-pos-utils", "harper-brill"]
33
resolver = "2"
44

55
# Comment out the below lines if you plan to use a debugger.

harper-brill/Cargo.toml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[package]
2+
name = "harper-brill"
3+
version = "0.42.0"
4+
edition = "2024"
5+
6+
[dependencies]
7+
harper-pos-utils = { path = "../harper-pos-utils/", version = "0.42.0" }
8+
lazy_static = "1.5.0"
9+
rs-conllu = "0.3.0"
10+
serde = "1.0.219"
11+
serde_json = "1.0.140"
12+
13+
[build-dependencies]
14+
rs-conllu = "0.3.0"
15+
serde = "1.0.219"
16+
serde_json = "1.0.140"

harper-brill/src/lib.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
use lazy_static::lazy_static;
2+
use std::sync::Arc;
3+
4+
pub use harper_pos_utils::{BrillChunker, BrillTagger, Chunker, FreqDict, Tagger, UPOS};
5+
6+
const BRILL_TAGGER_SOURCE: &str = include_str!("../trained_tagger_model.json");
7+
8+
lazy_static! {
9+
static ref BRILL_TAGGER: Arc<BrillTagger<FreqDict>> = Arc::new(uncached_brill_tagger());
10+
}
11+
12+
fn uncached_brill_tagger() -> BrillTagger<FreqDict> {
13+
serde_json::from_str(BRILL_TAGGER_SOURCE).unwrap()
14+
}
15+
16+
pub fn brill_tagger() -> Arc<BrillTagger<FreqDict>> {
17+
(*BRILL_TAGGER).clone()
18+
}
19+
20+
const BRILL_CHUNKER_SOURCE: &str = include_str!("../trained_chunker_model.json");
21+
22+
lazy_static! {
23+
static ref BRILL_CHUNKER: Arc<BrillChunker> = Arc::new(uncached_brill_chunker());
24+
}
25+
26+
fn uncached_brill_chunker() -> BrillChunker {
27+
serde_json::from_str(BRILL_CHUNKER_SOURCE).unwrap()
28+
}
29+
30+
pub fn brill_chunker() -> Arc<BrillChunker> {
31+
(*BRILL_CHUNKER).clone()
32+
}

0 commit comments

Comments
 (0)