Skip to content

Commit 4753d8c

Browse files
committed
correct script for eng, remove new reports from distribution
1 parent a4241c9 commit 4753d8c

File tree

4 files changed

+14
-7
lines changed

4 files changed

+14
-7
lines changed

unlvtests/Makefile.am

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11

22
EXTRA_DIST = README.md
33
EXTRA_DIST += counttestset.sh
4-
EXTRA_DIST += reorgdata.sh
54
EXTRA_DIST += runalltests.sh
5+
EXTRA_DIST += runalltests_spa.sh
66
EXTRA_DIST += runtestset.sh
77
EXTRA_DIST += reports/1995.bus.3B.sum
88
EXTRA_DIST += reports/1995.doe3.3B.sum
99
EXTRA_DIST += reports/1995.mag.3B.sum
1010
EXTRA_DIST += reports/1995.news.3B.sum
1111
EXTRA_DIST += reports/2.03.summary
1212
EXTRA_DIST += reports/2.04.summary
13-
EXTRA_DIST += reports/4_best_spa.summary
14-
EXTRA_DIST += reports/4_best_int_spa.summary
15-
EXTRA_DIST += reports/4_fast_spa.summary

unlvtests/counttestset.sh

+3-2
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,10 @@ do
4545
srcdir="$imdir"
4646
fi
4747
#echo "$srcdir/$page.tif"
48-
# Count character errors.
49-
iconv -f ISO8859-1 -t UTF-8 "$resdir/$page.unlv" >"$resdir/$page.text"
48+
# Convert groundtruth and recognized text to UTF-8 to correctly treat accented letters.
5049
iconv -f ISO8859-1 -t UTF-8 "$srcdir/$page.txt" >"$srcdir/$page.text"
50+
iconv -f ISO8859-1 -t UTF-8 "$resdir/$page.unlv" >"$resdir/$page.text"
51+
# Count character errors.
5152
ocrevalutf8 accuracy "$srcdir/$page.text" "$resdir/$page.text" > "$resdir/$page.acc"
5253
accfiles="$accfiles $resdir/$page.acc"
5354
# Count word errors.

unlvtests/reports/4_fast_eng.summary

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
1995 bus.3B 5959 98.14% 0.00% 1631 96.83% 0.00% 1293 95.73% 0.00%
2+
1995 doe3.3B 36349 97.52% 0.00% 7826 96.34% 0.00% 7042 94.87% 0.00%
3+
1995 mag.3B 15043 97.74% 0.00% 4566 96.01% 0.00% 3379 94.99% 0.00%
4+
1995 news.3B 6432 98.69% 0.00% 1946 97.68% 0.00% 1502 96.94% 0.00%
5+
4_fast_eng bus.3B 6124 98.11% 2.77% 1138 97.88% -30.23% 963 97.05 -25.52% 3935.26s
6+
4_fast_eng doe3.3B 30029 97.96% -17.39% 13781 94.45% 76.09% 13178 92.38 87.13% 18847.36s
7+
4_fast_eng mag.3B 10934 98.37% -27.32% 3343 97.15% -26.78% 2813 96.06 -16.75% 6867.14s
8+
4_fast_eng news.3B 5734 98.84% -10.85% 1322 98.45% -32.07% 1040 97.94 -30.76% 5527.38s
9+
4_fast_eng Total 52821 - -17.19% 19584 - 22.64% 17994 - 36.15%

unlvtests/runalltests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ do
7777
# Run tesseract on all the pages.
7878
$bindir/runtestset.sh "$imdir/$set/pages" "$tessdata" "eng"
7979
# Count the errors on all the pages.
80-
$bindir/counttestset.sh "$imdir/$set/pages"
80+
$bindir/counttestset.sh "$imdir/$set/pages" "eng"
8181
# Get the old character word and nonstop word errors.
8282
olderrs=$(cut -f3 "unlvtests/reports/1995.$set.sum")
8383
oldwerrs=$(cut -f6 "unlvtests/reports/1995.$set.sum")

0 commit comments

Comments
 (0)