Skip to content

Commit 37414fc

Browse files
committed
Merge branch 'master' of github.com:gaborcsardi/franc
2 parents 60dfa66 + 3913bf2 commit 37414fc

File tree

6 files changed

+48
-25
lines changed

6 files changed

+48
-25
lines changed

inst/README.Rmd

+7-6
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ knitr::opts_chunk$set(
1010

1111
> Detect the Language of Text
1212
13-
[![Linux Build Status](https://travis-ci.org/gaborcsardi/franc.svg?branch=master)](https://travis-ci.org/gaborcsardi/franc)
14-
[![Windows Build status](https://ci.appveyor.com/api/projects/status/github/gaborcsardi/franc?svg=true)](https://ci.appveyor.com/project/gaborcsardi/franc)
13+
[![Linux Build Status](https://travis-ci.org/MangoTheCat/franc.svg?branch=master)](https://travis-ci.org/MangoTheCat/franc)
14+
[![Windows Build
15+
status](https://ci.appveyor.com/api/projects/status/github/mangothecat/franc?svg=true)](https://ci.appveyor.com/project/gaborcsardi/franc)
1516
[![](http://www.r-pkg.org/badges/version/franc)](http://www.r-pkg.org/pkg/franc)
1617
[![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/franc)](http://www.r-pkg.org/pkg/franc)
1718

@@ -23,7 +24,7 @@ https://github.com/wooorm/franc.
2324
## Installation
2425

2526
```{r eval = FALSE}
26-
devtools::install_github("gaborcsardi/franc")
27+
devtools::install_github("mangothecat/franc")
2728
```
2829

2930
## Usage
@@ -62,7 +63,7 @@ head(franc_all("O Brasil caiu 26 posições",
6263

6364
The R version of franc supports 310 languages. By default only the
6465
languages with more than 1 million speakers are used, this is 175
65-
languages. The \code{min_speakers} argument can relax this, and allows
66+
languages. The `min_speakers` argument can relax this, and allows
6667
using more languages:
6768

6869
```{r}
@@ -72,5 +73,5 @@ head(franc_all("O Brasil caiu 26 posições", min_speakers = 0))
7273

7374
## License
7475

75-
MIT © Gábor Csárdi, Titus Wormer, Maciej Ceglowski, Jacob R. Rideout
76-
and Kent S. Johnson.
76+
MIT © [Mango Solutions](https://github.com/mangothecat), Titus Wormer,
77+
Maciej Ceglowski, Jacob R. Rideout and Kent S. Johnson.

inst/README.md

+7-6
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55

66
> Detect the Language of Text
77
8-
[![Linux Build Status](https://travis-ci.org/gaborcsardi/franc.svg?branch=master)](https://travis-ci.org/gaborcsardi/franc)
9-
[![Windows Build status](https://ci.appveyor.com/api/projects/status/github/gaborcsardi/franc?svg=true)](https://ci.appveyor.com/project/gaborcsardi/franc)
8+
[![Linux Build Status](https://travis-ci.org/MangoTheCat/franc.svg?branch=master)](https://travis-ci.org/MangoTheCat/franc)
9+
[![Windows Build
10+
status](https://ci.appveyor.com/api/projects/status/github/mangothecat/franc?svg=true)](https://ci.appveyor.com/project/gaborcsardi/franc)
1011
[![](http://www.r-pkg.org/badges/version/franc)](http://www.r-pkg.org/pkg/franc)
1112
[![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/franc)](http://www.r-pkg.org/pkg/franc)
1213

@@ -19,7 +20,7 @@ https://github.com/wooorm/franc.
1920

2021

2122
```r
22-
devtools::install_github("gaborcsardi/franc")
23+
devtools::install_github("mangothecat/franc")
2324
```
2425

2526
## Usage
@@ -125,7 +126,7 @@ head(franc_all("O Brasil caiu 26 posições",
125126

126127
The R version of franc supports 310 languages. By default only the
127128
languages with more than 1 million speakers are used, this is 175
128-
languages. The \code{min_speakers} argument can relax this, and allows
129+
languages. The `min_speakers` argument can relax this, and allows
129130
using more languages:
130131

131132

@@ -159,5 +160,5 @@ head(franc_all("O Brasil caiu 26 posições", min_speakers = 0))
159160

160161
## License
161162

162-
MIT © Gábor Csárdi, Titus Wormer, Maciej Ceglowski, Jacob R. Rideout
163-
and Kent S. Johnson.
163+
MIT © [Mango Solutions](https://github.com/mangothecat), Titus Wormer,
164+
Maciej Ceglowski, Jacob R. Rideout and Kent S. Johnson.

tests/testthat/test-franc.R

+1-3
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,14 @@ context("Language detection")
44
test_that("top language is detected correctly", {
55

66
expect_equal(franc("Alle menslike wesens word vry"), "afr")
7-
expect_equal(franc("এটি একটি ভাষা একক IBM স্ক্রিপ্ট"), "ben")
8-
expect_equal(franc("Alle mennesker er født frie og"), "nno")
97
expect_equal(franc(""), "und")
108
expect_equal(franc("the"), "und")
119
expect_equal(franc("the", min_length = 3), "sco")
1210
})
1311

1412
test_that("language scores are calculated correctly", {
1513

16-
scores <- franc_all('O Brasil caiu 26 posições')
14+
scores <- franc_all('O Brasil caiu 26 posi\u00c7\u00f5es')
1715

1816
expect_equal(
1917
scores[1:12,],

tests/testthat/test-scripts.R

+7-1
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,11 @@ test_that("script detection works", {
55

66
expect_equal(get_top_script(""), NULL)
77
expect_equal(get_top_script("this is in English"), "Latin")
8-
expect_equal(get_top_script("এটি একটি ভাষা একক IBM স্ক্রিপ্ট"), "ben")
8+
9+
ben <- paste0(
10+
"\u098F\u099F\u09BF \u098F\u0995\u099F\u09BF ",
11+
"\u09AD\u09BE\u09B7\u09BE \u098F\u0995\u0995 IBM ",
12+
"\u09B8\u09CD\u0995\u09CD\u09B0\u09BF\u09AA\u09CD\u099F"
13+
)
14+
expect_equal(get_top_script(ben), "ben")
915
})

tests/testthat/test-trigrams.R

+20-8
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,28 @@ test_that("clean_trigrams is case insensitive", {
4545

4646
test_that("clean_trigrams keeps UniCode letters", {
4747

48-
expect_equal(clean_trigrams("এটি একটি ভাষা একক IBM স্ক্রিপ্ট")[[1]],
49-
c(" এট", "এটি", "টি ", "ি এ", " এক", "একট",
50-
"কটি", "টি ", "ি ভ", " ভা", "ভাষ", "াষা",
51-
"ষা ", "া এ", " এক", "একক", "কক ", "ক i",
52-
" ib", "ibm", "bm ", "m স", " স্", "স্ক", "্ক্",
53-
"ক্র", "্রি", "রিপ", "িপ্", "প্ট",
54-
"্ট "))
48+
ben <- paste0(
49+
"\u098F\u099F\u09BF \u098F\u0995\u099F\u09BF ",
50+
"\u09AD\u09BE\u09B7\u09BE \u098F\u0995\u0995 IBM ",
51+
"\u09B8\u09CD\u0995\u09CD\u09B0\u09BF\u09AA\u09CD\u099F"
52+
)
53+
expect_equal(
54+
clean_trigrams(ben)[[1]],
55+
c(" \u098F\u099F", "\u098F\u099F\u09BF", "\u099F\u09BF ",
56+
"\u09BF \u098F", " \u098F\u0995", "\u098F\u0995\u099F",
57+
"\u0995\u099F\u09BF", "\u099F\u09BF ", "\u09BF \u09AD",
58+
" \u09AD\u09BE", "\u09AD\u09BE\u09B7",
59+
"\u09BE\u09B7\u09BE", "\u09B7\u09BE ",
60+
"\u09BE \u098F", " \u098F\u0995", "\u098F\u0995\u0995",
61+
"\u0995\u0995 ", "\u0995 i", " ib", "ibm", "bm ",
62+
"m \u09B8", " \u09B8\u09CD", "\u09B8\u09CD\u0995",
63+
"\u09CD\u0995\u09CD", "\u0995\u09CD\u09B0",
64+
"\u09CD\u09B0\u09BF", "\u09B0\u09BF\u09AA",
65+
"\u09BF\u09AA\u09CD", "\u09AA\u09CD\u099F",
66+
"\u09CD\u099F ")
67+
)
5568
})
5669

57-
5870
test_that("clean_trigrams removed excesive whitespace", {
5971

6072
expect_equal(clean_trigrams(" a ")[[1]], c(" a "))

tests/testthat/test-utils.R

+6-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,10 @@ test_that("match_length works", {
88
expect_equal(match_length("[a-z]", ""), 0)
99
expect_equal(match_length("[a-z]", "123"), 0)
1010

11-
expect_equal(match_length(expressions$ben, "এটি একটি ভাষা একক IBM স্ক্রিপ্ট"), 23)
11+
ben <- paste0(
12+
"\u098F\u099F\u09BF \u098F\u0995\u099F\u09BF ",
13+
"\u09AD\u09BE\u09B7\u09BE \u098F\u0995\u0995 IBM ",
14+
"\u09B8\u09CD\u0995\u09CD\u09B0\u09BF\u09AA\u09CD\u099F"
15+
)
16+
expect_equal(match_length(expressions$ben, ben), 23)
1217
})

0 commit comments

Comments
 (0)