Skip to content

Commit 323361b

Browse files
committed
allow user specified box/tiff pairs with tesstrain.sh
1 parent 31c48a0 commit 323361b

File tree

2 files changed

+22
-2
lines changed

2 files changed

+22
-2
lines changed

src/training/tesstrain.sh

+18-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,15 @@ echo -e "USAGE: tesstrain.sh
2828
--run_shape_clustering # Run shape clustering (use for Indic langs).
2929
--maxpages # Specify maximum pages to output (default:0=all)
3030
--save_box_tiff # Save box/tiff pairs along with lstmf files.
31+
<<<<<<< HEAD
3132
--x_size # Specify width of output image (default:3600)
33+
=======
34+
--xsize # Specify width of output image (default:3600)
35+
36+
OPTIONAL flag for specifying directory with user specified box/tiff pairs.
37+
Files should be named similar to ${LANG_CODE}.${fontname}.exp${EXPOSURE}.box/tif
38+
--my_boxtiff_dir MY_BOXTIFF_DIR # Location of user specified box/tiff files.
39+
>>>>>>> c7cd112... allow box/tiff pairs for LSTM training
3240
3341
OPTIONAL flags for input data. If unspecified we will look for them in
3442
the langdata_dir directory.
@@ -60,6 +68,14 @@ ARGV=("$@")
6068
parse_flags
6169

6270
mkdir -p ${TRAINING_DIR}
71+
72+
if [[ ${MY_BOXTIFF_DIR} != "" ]]; then
73+
tlog "\n=== Copy existing box/tiff pairs from '${MY_BOXTIFF_DIR}'"
74+
cp ${MY_BOXTIFF_DIR}/*.box ${TRAINING_DIR} | true
75+
cp ${MY_BOXTIFF_DIR}/*.tif ${TRAINING_DIR} | true
76+
ls -l ${TRAINING_DIR}
77+
fi
78+
6379
tlog "\n=== Starting training for language '${LANG_CODE}'"
6480

6581
source "$(dirname $0)/language-specific.sh"
@@ -72,8 +88,8 @@ phase_UP_generate_unicharset
7288
if ((LINEDATA)); then
7389
phase_E_extract_features " --psm 6 lstm.train " 8 "lstmf"
7490
make__lstmdata
75-
tlog "\nCreated starter traineddata for language '${LANG_CODE}'\n"
76-
tlog "\nRun lstmtraining to do the LSTM training for language '${LANG_CODE}'\n"
91+
tlog "\nCreated starter traineddata for LSTM training of language '${LANG_CODE}'\n"
92+
tlog "\nRun 'lstmtraining' comman next to continue LSTM training for language '${LANG_CODE}'\n"
7793
else
7894
phase_D_generate_dawg
7995
phase_E_extract_features "box.train" 8 "tr"

src/training/tesstrain_utils.sh

+4
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ fi
3636
X_SIZE=3600
3737
MAX_PAGES=0
3838
SAVE_BOX_TIFF=0
39+
MY_BOXTIFF_DIR=""
3940
OUTPUT_DIR="/tmp/tesstrain/tessdata"
4041
OVERWRITE=0
4142
LINEDATA=0
@@ -157,6 +158,9 @@ parse_flags() {
157158
--maxpages)
158159
parse_value "MAX_PAGES" ${ARGV[$j]:-}
159160
i=$j ;;
161+
--my_boxtiff_dir)
162+
parse_value "MY_BOXTIFF_DIR" ${ARGV[$j]:-}
163+
i=$j ;;
160164
--output_dir)
161165
parse_value "OUTPUT_DIR" ${ARGV[$j]:-}
162166
i=$j ;;

0 commit comments

Comments
 (0)