Skip to content

Commit 5024a47

Browse files
committed
Update udhr
1 parent 9dc7da3 commit 5024a47

File tree

3 files changed

+57
-50
lines changed

3 files changed

+57
-50
lines changed

package.json

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@
1111
"alpha-sort": "^5.0.0",
1212
"c8": "^7.0.0",
1313
"format": "^0.2.0",
14+
"hast-util-select": "^5.0.0",
15+
"hast-util-to-text": "^3.0.0",
1416
"human-format": "^0.11.0",
1517
"is-hidden": "^2.0.0",
1618
"iso-639-3": "^3.0.0",
1719
"lerna": "^4.0.0",
1820
"negate": "^1.0.0",
1921
"parse-author": "^2.0.0",
2022
"prettier": "^2.0.0",
23+
"rehype-parse": "^8.0.0",
2124
"remark-cli": "^10.0.0",
2225
"remark-gfm": "^2.0.0",
2326
"remark-lint-no-heading-punctuation": "^3.0.0",
@@ -28,7 +31,7 @@
2831
"speakers": "^2.0.0",
2932
"tape": "^5.0.0",
3033
"trigrams": "^4.0.0",
31-
"udhr": "^4.0.0",
34+
"udhr": "^5.0.0",
3235
"unicode-12.1.0": "^0.8.0",
3336
"unified": "^10.0.0",
3437
"unist-builder": "^3.0.0",

script/build.js

+43-39
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@ import {isHidden} from 'is-hidden'
77
import {iso6393} from 'iso-639-3'
88
import {speakers} from 'speakers'
99
import {unified} from 'unified'
10+
import rehypeParse from 'rehype-parse'
1011
import gfm from 'remark-gfm'
1112
import stringify from 'remark-stringify'
1213
import {u} from 'unist-builder'
14+
import {selectAll} from 'hast-util-select'
15+
import {toText} from 'hast-util-to-text'
1316
import format from 'format'
1417
import author from 'parse-author'
1518
import human from 'human-format'
1619
import alphaSort from 'alpha-sort'
17-
import udhr from 'udhr'
20+
import {udhr} from 'udhr'
1821
import allTrigrams from 'trigrams'
1922
import unicode from 'unicode-12.1.0'
2023
import customFixtures from './custom-fixtures.js'
@@ -24,8 +27,6 @@ import exclude from './udhr-exclude.js'
2427
const ascending = alphaSort()
2528

2629
const trigrams = allTrigrams.min()
27-
const information = udhr.information()
28-
const declarations = udhr.json()
2930
const scripts = unicode.Script
3031

3132
const require = createRequire(import.meta.url)
@@ -175,19 +176,32 @@ function generate(basename) {
175176
fixtures = {}
176177

177178
support.forEach(function (language) {
178-
var udhrKey = language.udhr
179+
var udhrKey = language.udhr || language.iso6393
179180
var fixture
180181

181182
if (udhrKey in customFixtures) {
182183
fixture = customFixtures[udhrKey]
183-
} else if (udhrKey in declarations) {
184-
if (
185-
declarations[udhrKey].preamble &&
186-
declarations[udhrKey].preamble.para
187-
) {
188-
fixture = declarations[udhrKey].preamble.para
189-
} else if (declarations[udhrKey].note && declarations[udhrKey].note[0]) {
190-
fixture = declarations[udhrKey].note[0].para
184+
} else if (udhrKey) {
185+
const info = udhr.find((d) => d.code === udhrKey)
186+
187+
if (info) {
188+
const declaration = String(
189+
fs.readFileSync(
190+
path.join('node_modules', 'udhr', 'declaration', udhrKey + '.html')
191+
)
192+
)
193+
const tree = unified().use(rehypeParse).parse(declaration)
194+
195+
fixture =
196+
selectAll('header p', tree)
197+
.map((d) => toText(d))
198+
.join('\n') ||
199+
selectAll(
200+
'body > :matches(h1, h2, h3, h4, h5, h6), header :matches(h1, h2, h3, h4, h5, h6)',
201+
tree
202+
)
203+
.map((d) => toText(d))
204+
.join('\n')
191205
}
192206
}
193207

@@ -326,30 +340,22 @@ function count(list) {
326340
return map
327341
}
328342

329-
/* Get all values at `key` properties in `object`. */
330-
function all(object, key) {
331-
var results = []
332-
var property
333-
var value
334-
335-
for (property in object) {
336-
value = object[property]
343+
/* Get which scripts are used for a given UDHR code. */
344+
function scriptInformation(code) {
345+
const info = code ? udhr.find((d) => d.code === code) : undefined
346+
let paragraphs = ''
337347

338-
if (property === key) {
339-
results.push(value)
340-
} else if (typeof value === 'object') {
341-
results = results.concat(all(value, key))
342-
}
348+
if (info) {
349+
const declaration = fs.readFileSync(
350+
path.join('node_modules', 'udhr', 'declaration', code + '.html')
351+
)
352+
const tree = unified().use(rehypeParse).parse(declaration)
353+
paragraphs = selectAll('article p', tree)
354+
.map((d) => toText(d))
355+
.join('\n')
343356
}
344357

345-
return results
346-
}
347-
348-
/* Get which scripts are used for a given UDHR code. */
349-
function scriptInformation(code) {
350-
var declaration = declarations[code]
351-
var content = all(declaration, 'para').join('')
352-
var length = content.length
358+
var length = paragraphs.length
353359
var scriptInformation = {}
354360

355361
Object.keys(expressions).forEach(function (script) {
@@ -360,7 +366,7 @@ function scriptInformation(code) {
360366
return
361367
}
362368

363-
count = content.match(expressions[script])
369+
count = paragraphs.match(expressions[script])
364370
count = (count ? count.length : 0) / length
365371
count = Math.round(count * 100) / 100
366372

@@ -520,11 +526,9 @@ function getUDHRKeysfromISO(iso) {
520526
return overrides[iso]
521527
}
522528

523-
Object.keys(information).forEach(function (code) {
524-
var info = information[code]
525-
526-
if (info.ISO === iso || info.code === iso) {
527-
udhrs.push(code)
529+
udhr.forEach(function (info) {
530+
if (info.iso6393 === iso || info.code === iso) {
531+
udhrs.push(info.code)
528532
}
529533
})
530534

0 commit comments

Comments
 (0)