Skip to content

Commit 43e3f24

Browse files
committed
add variable --save_box_tiff to Save box/tiff pairs along with lstmf files.
1 parent b34cf9d commit 43e3f24

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

src/training/tesstrain.sh

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
# --lang LANG_CODE # ISO 639 code.
2323
# --langdata_dir DATADIR # Path to tesseract/training/langdata directory.
2424
# --output_dir OUTPUTDIR # Location of output traineddata file.
25+
# --save_box_tiff # Save box/tiff pairs along with lstmf files.
2526
# --overwrite # Safe to overwrite files in output_dir.
2627
# --linedata_only # Only generate training data for lstmtraining.
2728
# --run_shape_clustering # Run shape clustering (use for Indic langs).

src/training/tesstrain_utils.sh

+11-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ else
2424
FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX)
2525
fi
2626
MAX_PAGES=0
27+
SAVE_BOX_TIFF=0
2728
OUTPUT_DIR="/tmp/tesstrain/tessdata"
2829
OVERWRITE=0
2930
LINEDATA=0
@@ -139,6 +140,8 @@ parse_flags() {
139140
i=$j ;;
140141
--overwrite)
141142
OVERWRITE=1 ;;
143+
--save_box_tiff)
144+
SAVE_BOX_TIFF=1 ;;
142145
--linedata_only)
143146
LINEDATA=1 ;;
144147
--extract_font_properties)
@@ -182,7 +185,9 @@ parse_flags() {
182185
fi
183186

184187
# Location where intermediate files will be created.
185-
TRAINING_DIR=${WORKSPACE_DIR}/${LANG_CODE}
188+
TIMESTAMP=`date +%Y-%m-%d`
189+
TMP_DIR=$(mktemp -d --tmpdir ${LANG_CODE}-${TIMESTAMP}.XXX )
190+
TRAINING_DIR=${TMP_DIR}
186191
# Location of log file for the whole run.
187192
LOG_FILE=${TRAINING_DIR}/tesstrain.log
188193

@@ -530,6 +535,9 @@ make__lstmdata() {
530535
--puncs "${lang_prefix}.punc" \
531536
--output_dir "${OUTPUT_DIR}" --lang "${LANG_CODE}" \
532537
"${pass_through}" "${lang_is_rtl}"
538+
539+
if ((SAVE_BOX_TIFF)); then
540+
tlog "\n=== Saving box/tiff pairs for training data ==="
533541
for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
534542
tlog "Moving ${f} to ${OUTPUT_DIR}"
535543
mv "${f}" "${OUTPUT_DIR}"
@@ -538,6 +546,8 @@ make__lstmdata() {
538546
tlog "Moving ${f} to ${OUTPUT_DIR}"
539547
mv "${f}" "${OUTPUT_DIR}"
540548
done
549+
fi
550+
tlog "\n=== Moving lstmf files for training data ==="
541551
for f in "${TRAINING_DIR}/${LANG_CODE}".*.lstmf; do
542552
tlog "Moving ${f} to ${OUTPUT_DIR}"
543553
mv "${f}" "${OUTPUT_DIR}"

0 commit comments

Comments
 (0)