Skip to content

Commit 318b88d

Browse files
committed
ccmain: Fix typos in comments and strings
Most of them were found by codespell. Signed-off-by: Stefan Weil <[email protected]>
1 parent 11b2a4d commit 318b88d

8 files changed

+37
-37
lines changed

ccmain/control.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1556,7 +1556,7 @@ void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word,
15561556
word->fix_quotes();
15571557
if (tessedit_fix_hyphens)
15581558
word->fix_hyphens();
1559-
/* Dont trust fix_quotes! - though I think I've fixed the bug */
1559+
/* Don't trust fix_quotes! - though I think I've fixed the bug */
15601560
if (word->best_choice->length() != word->box_word->length()) {
15611561
tprintf("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;"
15621562
" #Blobs=%d\n",
@@ -1694,7 +1694,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(
16941694
goto not_a_word;
16951695
/*
16961696
Allow a single hyphen in a lower case word
1697-
- dont trust upper case - I've seen several cases of "H" -> "I-I"
1697+
- don't trust upper case - I've seen several cases of "H" -> "I-I"
16981698
*/
16991699
if (lengths[i] == 1 && s[offset] == '-') {
17001700
hyphen_pos = i;

ccmain/docqual.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) {
129129
int expected_outline_count;
130130

131131
if (STRING (outlines_odd).contains (c))
132-
return 0; //Dont use this char
132+
return 0; //Don't use this char
133133
else if (STRING (outlines_2).contains (c))
134134
expected_outline_count = 2;
135135
else
@@ -157,7 +157,7 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it,
157157
* - Word segmentation is the same as the original image
158158
* - All characters have the expected number of outlines
159159
* NOTE - the rejection counts are recalculated after unrejection
160-
* - CANT do it in a single pass without a bit of fiddling
160+
* - CAN'T do it in a single pass without a bit of fiddling
161161
* - keep it simple but inefficient
162162
*************************************************************************/
163163
void Tesseract::unrej_good_quality_words( //unreject potential
@@ -403,7 +403,7 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
403403

404404
/*************************************************************************
405405
* reject_whole_page()
406-
* Dont believe any of it - set the reject map to 00..00 in all words
406+
* Don't believe any of it - set the reject map to 00..00 in all words
407407
*
408408
*************************************************************************/
409409

ccmain/fixspace.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
5555
WERD_RES *word_res;
5656
WERD_RES_LIST fuzzy_space_words;
5757
inT16 new_length;
58-
BOOL8 prevent_null_wd_fixsp; // DONT process blobless wds
58+
BOOL8 prevent_null_wd_fixsp; // DON'T process blobless wds
5959
inT32 word_index; // current word
6060

6161
block_res_it.set_to_list(&page_res->block_res_list);
@@ -222,7 +222,7 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
222222
* fuzzy spaces. The problem with the basic measure is that "561 63" would score
223223
* the same as "56163", though given our knowledge that the space is fuzzy, and
224224
* that there is a "1" next to the fuzzy space, we need to ensure that "56163"
225-
* is prefered.
225+
* is preferred.
226226
*
227227
* The solution is to NOT COUNT the score of any word which has a digit at one
228228
* end and a "1Il" as the character the other side of the space.
@@ -272,8 +272,8 @@ inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
272272
} else {
273273
/*
274274
Can we add the prev word score and potentially count this word?
275-
Yes IF it didnt end in a 1 when the first char of this word is a digit
276-
AND it didnt end in a digit when the first char of this word is a 1
275+
Yes IF it didn't end in a 1 when the first char of this word is a digit
276+
AND it didn't end in a digit when the first char of this word is a 1
277277
*/
278278
word_len = word->reject_map.length();
279279
current_word_ok_so_far = FALSE;
@@ -507,7 +507,7 @@ BOOL8 Tesseract::fixspace_thinks_word_done(WERD_RES *word) {
507507

508508
/*
509509
Use all the standard pass 2 conditions for mode 5 in set_done() in
510-
reject.c BUT DONT REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DONT
510+
reject.c BUT DON'T REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DON'T
511511
CARE WHETHER WE HAVE of/at on/an etc.
512512
*/
513513
if (fixsp_done_mode > 0 &&

ccmain/output.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated?
297297
/*************************************************************************
298298
* SUSPECT LEVELS
299299
*
300-
* 0 - dont reject ANYTHING
300+
* 0 - don't reject ANYTHING
301301
* 1,2 - partial rejection
302302
* 3 - BEST
303303
*
@@ -337,7 +337,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
337337
rating_per_ch = word.rating() / word_res->reject_map.length();
338338

339339
if (rating_per_ch >= suspect_rating_per_ch)
340-
return; //Dont touch bad ratings
340+
return; //Don't touch bad ratings
341341

342342
if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
343343
/* Unreject any Tess Acceptable word - but NOT tess reject chs*/

ccmain/paramsd.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -329,13 +329,13 @@ void ParamsEditor::WriteParams(char *filename,
329329
fclose(fp);
330330
sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename);
331331
int a = sv_window_->ShowYesNoDialog(msg_str);
332-
if (a == 'n') { return; } // dont write
332+
if (a == 'n') { return; } // don't write
333333
}
334334

335335

336336
fp = fopen (filename, "wb"); // can we write to it?
337337
if (fp == NULL) {
338-
sv_window_->AddMessage("Cant write to file " "%s" "", filename);
338+
sv_window_->AddMessage("Can't write to file " "%s" "", filename);
339339
return;
340340
}
341341

ccmain/reject.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ BOOL8 Tesseract::word_contains_non_1_digit(const char *word,
521521

522522
/*************************************************************************
523523
* dont_allow_1Il()
524-
* Dont unreject LONE accepted 1Il conflict set chars
524+
* Don't unreject LONE accepted 1Il conflict set chars
525525
*************************************************************************/
526526
void Tesseract::dont_allow_1Il(WERD_RES *word) {
527527
int i = 0;
@@ -633,7 +633,7 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) {
633633
next_left = 9999;
634634
else
635635
next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left();
636-
// Dont touch small or touching blobs - it is too dangerous.
636+
// Don't touch small or touching blobs - it is too dangerous.
637637
if ((out_box.width() > 8 * word_res->denorm.x_scale()) &&
638638
(out_box.left() > prev_right) && (out_box.right() < next_left)) {
639639
aspect_ratio = out_box.width() / (float) out_box.height();

ccmain/tesseractclass.cpp

+10-10
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ Tesseract::Tesseract()
136136
BOOL_MEMBER(tessedit_fix_fuzzy_spaces, true,
137137
"Try to improve fuzzy spaces", this->params()),
138138
BOOL_MEMBER(tessedit_unrej_any_wd, false,
139-
"Dont bother with word plausibility", this->params()),
139+
"Don't bother with word plausibility", this->params()),
140140
BOOL_MEMBER(tessedit_fix_hyphens, true, "Crunch double hyphens?",
141141
this->params()),
142142
BOOL_MEMBER(tessedit_redo_xheight, true, "Check/Correct x-height",
@@ -310,19 +310,19 @@ Tesseract::Tesseract()
310310
this->params()),
311311
INT_MEMBER(crunch_pot_indicators, 1,
312312
"How many potential indicators needed", this->params()),
313-
BOOL_MEMBER(crunch_leave_ok_strings, true, "Dont touch sensible strings",
313+
BOOL_MEMBER(crunch_leave_ok_strings, true, "Don't touch sensible strings",
314314
this->params()),
315315
BOOL_MEMBER(crunch_accept_ok, true, "Use acceptability in okstring",
316316
this->params()),
317317
BOOL_MEMBER(crunch_leave_accept_strings, false,
318-
"Dont pot crunch sensible strings", this->params()),
318+
"Don't pot crunch sensible strings", this->params()),
319319
BOOL_MEMBER(crunch_include_numerals, false, "Fiddle alpha figures",
320320
this->params()),
321321
INT_MEMBER(crunch_leave_lc_strings, 4,
322-
"Dont crunch words with long lower case strings",
322+
"Don't crunch words with long lower case strings",
323323
this->params()),
324324
INT_MEMBER(crunch_leave_uc_strings, 4,
325-
"Dont crunch words with long lower case strings",
325+
"Don't crunch words with long lower case strings",
326326
this->params()),
327327
INT_MEMBER(crunch_long_repetitions, 3,
328328
"Crunch words with long repetitions", this->params()),
@@ -393,21 +393,21 @@ Tesseract::Tesseract()
393393
INT_MEMBER(suspect_space_level, 100,
394394
"Min suspect level for rejecting spaces", this->params()),
395395
INT_MEMBER(suspect_short_words, 2,
396-
"Dont Suspect dict wds longer than this", this->params()),
396+
"Don't suspect dict wds longer than this", this->params()),
397397
BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected",
398398
this->params()),
399-
double_MEMBER(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit",
399+
double_MEMBER(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit",
400400
this->params()),
401401
double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit",
402402
this->params()),
403403
BOOL_MEMBER(tessedit_minimal_rejection, false,
404404
"Only reject tess failures", this->params()),
405-
BOOL_MEMBER(tessedit_zero_rejection, false, "Dont reject ANYTHING",
405+
BOOL_MEMBER(tessedit_zero_rejection, false, "Don't reject ANYTHING",
406406
this->params()),
407407
BOOL_MEMBER(tessedit_word_for_word, false,
408408
"Make output have exactly one word per WERD", this->params()),
409409
BOOL_MEMBER(tessedit_zero_kelvin_rejection, false,
410-
"Dont reject ANYTHING AT ALL", this->params()),
410+
"Don't reject ANYTHING AT ALL", this->params()),
411411
BOOL_MEMBER(tessedit_consistent_reps, true,
412412
"Force all rep chars the same", this->params()),
413413
INT_MEMBER(tessedit_reject_mode, 0, "Rejection algorithm",
@@ -424,7 +424,7 @@ Tesseract::Tesseract()
424424
"Use DOC dawg in 11l conf. detector", this->params()),
425425
BOOL_MEMBER(rej_1Il_use_dict_word, false, "Use dictword test",
426426
this->params()),
427-
BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Dont double check",
427+
BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Don't double check",
428428
this->params()),
429429
BOOL_MEMBER(rej_use_tess_accepted, true, "Individual rejection control",
430430
this->params()),

ccmain/tesseractclass.h

+11-11
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,7 @@ class Tesseract : public Wordrec {
733733
GenericVector<UNICHAR_ID>* class_ids);
734734
// Resegments the word to achieve the target_text from the classifier.
735735
// Returns false if the re-segmentation fails.
736-
// Uses brute-force combination of upto kMaxGroupSize adjacent blobs, and
736+
// Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and
737737
// applies a full search on the classifier results to find the best classified
738738
// segmentation. As a compromise to obtain better recall, 1-1 ambigiguity
739739
// substitutions ARE used.
@@ -833,7 +833,7 @@ class Tesseract : public Wordrec {
833833
BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true,
834834
"Try to improve fuzzy spaces");
835835
BOOL_VAR_H(tessedit_unrej_any_wd, false,
836-
"Dont bother with word plausibility");
836+
"Don't bother with word plausibility");
837837
BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?");
838838
BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height");
839839
BOOL_VAR_H(tessedit_enable_doc_dict, true,
@@ -954,15 +954,15 @@ class Tesseract : public Wordrec {
954954
double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this");
955955
INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch");
956956
INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed");
957-
BOOL_VAR_H(crunch_leave_ok_strings, true, "Dont touch sensible strings");
957+
BOOL_VAR_H(crunch_leave_ok_strings, true, "Don't touch sensible strings");
958958
BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring");
959959
BOOL_VAR_H(crunch_leave_accept_strings, false,
960-
"Dont pot crunch sensible strings");
960+
"Don't pot crunch sensible strings");
961961
BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures");
962962
INT_VAR_H(crunch_leave_lc_strings, 4,
963-
"Dont crunch words with long lower case strings");
963+
"Don't crunch words with long lower case strings");
964964
INT_VAR_H(crunch_leave_uc_strings, 4,
965-
"Dont crunch words with long lower case strings");
965+
"Don't crunch words with long lower case strings");
966966
INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions");
967967
INT_VAR_H(crunch_debug, 0, "As it says");
968968
INT_VAR_H(fixsp_non_noise_limit, 1,
@@ -1010,16 +1010,16 @@ class Tesseract : public Wordrec {
10101010
INT_VAR_H(suspect_space_level, 100,
10111011
"Min suspect level for rejecting spaces");
10121012
INT_VAR_H(suspect_short_words, 2,
1013-
"Dont Suspect dict wds longer than this");
1013+
"Don't Suspect dict wds longer than this");
10141014
BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected");
1015-
double_VAR_H(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit");
1015+
double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit");
10161016
double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit");
10171017
BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures");
1018-
BOOL_VAR_H(tessedit_zero_rejection, false, "Dont reject ANYTHING");
1018+
BOOL_VAR_H(tessedit_zero_rejection, false, "Don't reject ANYTHING");
10191019
BOOL_VAR_H(tessedit_word_for_word, false,
10201020
"Make output have exactly one word per WERD");
10211021
BOOL_VAR_H(tessedit_zero_kelvin_rejection, false,
1022-
"Dont reject ANYTHING AT ALL");
1022+
"Don't reject ANYTHING AT ALL");
10231023
BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same");
10241024
INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm");
10251025
BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug");
@@ -1030,7 +1030,7 @@ class Tesseract : public Wordrec {
10301030
"Aspect ratio dot/hyphen test");
10311031
BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector");
10321032
BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test");
1033-
BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Dont double check");
1033+
BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Don't double check");
10341034
BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control");
10351035
BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control");
10361036
BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control");

0 commit comments

Comments
 (0)