File tree 4 files changed +14
-7
lines changed
4 files changed +14
-7
lines changed Original file line number Diff line number Diff line change 1
1
2
2
EXTRA_DIST = README.md
3
3
EXTRA_DIST += counttestset.sh
4
- EXTRA_DIST += reorgdata.sh
5
4
EXTRA_DIST += runalltests.sh
5
+ EXTRA_DIST += runalltests_spa.sh
6
6
EXTRA_DIST += runtestset.sh
7
7
EXTRA_DIST += reports/1995.bus.3B.sum
8
8
EXTRA_DIST += reports/1995.doe3.3B.sum
9
9
EXTRA_DIST += reports/1995.mag.3B.sum
10
10
EXTRA_DIST += reports/1995.news.3B.sum
11
11
EXTRA_DIST += reports/2.03.summary
12
12
EXTRA_DIST += reports/2.04.summary
13
- EXTRA_DIST += reports/4_best_spa.summary
14
- EXTRA_DIST += reports/4_best_int_spa.summary
15
- EXTRA_DIST += reports/4_fast_spa.summary
Original file line number Diff line number Diff line change 45
45
srcdir=" $imdir "
46
46
fi
47
47
# echo "$srcdir/$page.tif"
48
- # Count character errors.
49
- iconv -f ISO8859-1 -t UTF-8 " $resdir /$page .unlv" > " $resdir /$page .text"
48
+ # Convert groundtruth and recognized text to UTF-8 to correctly treat accented letters.
50
49
iconv -f ISO8859-1 -t UTF-8 " $srcdir /$page .txt" > " $srcdir /$page .text"
50
+ iconv -f ISO8859-1 -t UTF-8 " $resdir /$page .unlv" > " $resdir /$page .text"
51
+ # Count character errors.
51
52
ocrevalutf8 accuracy " $srcdir /$page .text" " $resdir /$page .text" > " $resdir /$page .acc"
52
53
accfiles=" $accfiles $resdir /$page .acc"
53
54
# Count word errors.
Original file line number Diff line number Diff line change
1
+ 1995 bus.3B 5959 98.14% 0.00% 1631 96.83% 0.00% 1293 95.73% 0.00%
2
+ 1995 doe3.3B 36349 97.52% 0.00% 7826 96.34% 0.00% 7042 94.87% 0.00%
3
+ 1995 mag.3B 15043 97.74% 0.00% 4566 96.01% 0.00% 3379 94.99% 0.00%
4
+ 1995 news.3B 6432 98.69% 0.00% 1946 97.68% 0.00% 1502 96.94% 0.00%
5
+ 4_fast_eng bus.3B 6124 98.11% 2.77% 1138 97.88% -30.23% 963 97.05 -25.52% 3935.26s
6
+ 4_fast_eng doe3.3B 30029 97.96% -17.39% 13781 94.45% 76.09% 13178 92.38 87.13% 18847.36s
7
+ 4_fast_eng mag.3B 10934 98.37% -27.32% 3343 97.15% -26.78% 2813 96.06 -16.75% 6867.14s
8
+ 4_fast_eng news.3B 5734 98.84% -10.85% 1322 98.45% -32.07% 1040 97.94 -30.76% 5527.38s
9
+ 4_fast_eng Total 52821 - -17.19% 19584 - 22.64% 17994 - 36.15%
Original file line number Diff line number Diff line change 77
77
# Run tesseract on all the pages.
78
78
$bindir /runtestset.sh " $imdir /$set /pages" " $tessdata " " eng"
79
79
# Count the errors on all the pages.
80
- $bindir /counttestset.sh " $imdir /$set /pages"
80
+ $bindir /counttestset.sh " $imdir /$set /pages" " eng "
81
81
# Get the old character word and nonstop word errors.
82
82
olderrs=$( cut -f3 " unlvtests/reports/1995.$set .sum" )
83
83
oldwerrs=$( cut -f6 " unlvtests/reports/1995.$set .sum" )
You can’t perform that action at this time.
0 commit comments