Skip to content

Commit 92e2ad0

Browse files
committed
Fix CID 1164703 (Untrusted value as argument)
Wrong file data could give a large value for the number of vector elements resulting in very large memory allocations. Limit the allowed data range to UINT16_MAX (65535) elements which hopefully should be sufficient for all use cases. Changing the data type of the related member variables from int to uint32_t allowed removing several type casts. Signed-off-by: Stefan Weil <[email protected]>
1 parent a078ce0 commit 92e2ad0

File tree

7 files changed

+30
-30
lines changed

7 files changed

+30
-30
lines changed

src/ccutil/genericvector.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -564,12 +564,14 @@ class PointerVector : public GenericVector<T*> {
564564
// Also needs T::T(), as new T is used in this function.
565565
// Returns false in case of error.
566566
bool DeSerialize(bool swap, FILE* fp) {
567-
int32_t reserved;
567+
uint32_t reserved;
568568
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
569569
if (swap) Reverse32(&reserved);
570+
// Arbitrarily limit the number of elements to protect against bad data.
571+
if (reserved > UINT16_MAX) return false;
570572
GenericVector<T*>::reserve(reserved);
571573
truncate(0);
572-
for (int i = 0; i < reserved; ++i) {
574+
for (uint32_t i = 0; i < reserved; ++i) {
573575
int8_t non_null;
574576
if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
575577
T* item = nullptr;

src/classify/adaptmatch.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1339,7 +1339,7 @@ int Classify::CharNormTrainingSample(bool pruner_only,
13391339
ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS();
13401340
adapt_results->Initialize();
13411341
// Compute the bounding box of the features.
1342-
int num_features = sample.num_features();
1342+
uint32_t num_features = sample.num_features();
13431343
// Only the top and bottom of the blob_box are used by MasterMatcher, so
13441344
// fabricate right and left using top and bottom.
13451345
TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),

src/classify/mastertrainer.cpp

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
// Copyright 2010 Google Inc. All Rights Reserved.
2-
// Author: [email protected] (Ray Smith)
31
///////////////////////////////////////////////////////////////////////
42
// File: mastertrainer.cpp
53
// Description: Trainer to build the MasterClassifier.
@@ -552,8 +550,8 @@ CLUSTERER* MasterTrainer::SetupForClustering(
552550
int sample_id = 0;
553551
for (int i = sample_ptrs.size() - 1; i >= 0; --i) {
554552
const TrainingSample* sample = sample_ptrs[i];
555-
int num_features = sample->num_micro_features();
556-
for (int f = 0; f < num_features; ++f)
553+
uint32_t num_features = sample->num_micro_features();
554+
for (uint32_t f = 0; f < num_features; ++f)
557555
MakeSample(clusterer, sample->micro_features()[f], sample_id);
558556
++sample_id;
559557
}
@@ -706,7 +704,7 @@ void MasterTrainer::DisplaySamples(const char* unichar_str1, int cloud_font,
706704
if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) {
707705
const TrainingSample* sample = samples_.GetCanonicalSample(canonical_font,
708706
class_id2);
709-
for (int f = 0; f < sample->num_features(); ++f) {
707+
for (uint32_t f = 0; f < sample->num_features(); ++f) {
710708
RenderIntFeature(f_window, &sample->features()[f], ScrollView::RED);
711709
}
712710
}

src/classify/picofeat.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,10 @@ FEATURE_SET Classify::ExtractIntCNFeatures(
224224
blob, false, &local_fx_info, &bl_features);
225225
if (sample == nullptr) return nullptr;
226226

227-
int num_features = sample->num_features();
227+
uint32_t num_features = sample->num_features();
228228
const INT_FEATURE_STRUCT* features = sample->features();
229229
FEATURE_SET feature_set = NewFeatureSet(num_features);
230-
for (int f = 0; f < num_features; ++f) {
230+
for (uint32_t f = 0; f < num_features; ++f) {
231231
FEATURE feature = NewFeature(&IntFeatDesc);
232232

233233
feature->Params[IntX] = features[f].X;

src/classify/shapeclassifier.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ void ShapeClassifier::DebugDisplay(const TrainingSample& sample,
109109
popup_menu->BuildMenu(debug_win, false);
110110
// Display the features in green.
111111
const INT_FEATURE_STRUCT* features = sample.features();
112-
int num_features = sample.num_features();
113-
for (int f = 0; f < num_features; ++f) {
112+
uint32_t num_features = sample.num_features();
113+
for (uint32_t f = 0; f < num_features; ++f) {
114114
RenderIntFeature(debug_win, &features[f], ScrollView::GREEN);
115115
}
116116
debug_win->Update();

src/classify/trainingsample.cpp

+14-14
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,10 @@ bool TrainingSample::Serialize(FILE* fp) const {
6161
return false;
6262
if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
6363
return false;
64-
if (static_cast<int>(fwrite(features_, sizeof(*features_), num_features_, fp))
65-
!= num_features_)
64+
if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
6665
return false;
67-
if (static_cast<int>(fwrite(micro_features_, sizeof(*micro_features_),
68-
num_micro_features_,
69-
fp)) != num_micro_features_)
66+
if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
67+
fp) != num_micro_features_)
7068
return false;
7169
if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
7270
kNumCNParams) return false;
@@ -102,16 +100,18 @@ bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
102100
ReverseN(&num_micro_features_, sizeof(num_micro_features_));
103101
ReverseN(&outline_length_, sizeof(outline_length_));
104102
}
103+
// Arbitrarily limit the number of elements to protect against bad data.
104+
if (num_features_ > UINT16_MAX) return false;
105+
if (num_micro_features_ > UINT16_MAX) return false;
105106
delete [] features_;
106107
features_ = new INT_FEATURE_STRUCT[num_features_];
107-
if (static_cast<int>(fread(features_, sizeof(*features_), num_features_, fp))
108+
if (fread(features_, sizeof(*features_), num_features_, fp)
108109
!= num_features_)
109110
return false;
110111
delete [] micro_features_;
111112
micro_features_ = new MicroFeature[num_micro_features_];
112-
if (static_cast<int>(fread(micro_features_, sizeof(*micro_features_),
113-
num_micro_features_,
114-
fp)) != num_micro_features_)
113+
if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_,
114+
fp) != num_micro_features_)
115115
return false;
116116
if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
117117
kNumCNParams) return false;
@@ -165,7 +165,7 @@ TrainingSample* TrainingSample::RandomizedCopy(int index) const {
165165
++index; // Remove the first combination.
166166
const int yshift = kYShiftValues[index / kSampleScaleSize];
167167
double scaling = kScaleValues[index % kSampleScaleSize];
168-
for (int i = 0; i < num_features_; ++i) {
168+
for (uint32_t i = 0; i < num_features_; ++i) {
169169
double result = (features_[i].X - kRandomizingCenter) * scaling;
170170
result += kRandomizingCenter;
171171
sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
@@ -217,7 +217,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type,
217217
} else {
218218
num_features_ = char_features->NumFeatures;
219219
features_ = new INT_FEATURE_STRUCT[num_features_];
220-
for (int f = 0; f < num_features_; ++f) {
220+
for (uint32_t f = 0; f < num_features_; ++f) {
221221
features_[f].X =
222222
static_cast<uint8_t>(char_features->Features[f]->Params[IntX]);
223223
features_[f].Y =
@@ -238,7 +238,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type,
238238
} else {
239239
num_micro_features_ = char_features->NumFeatures;
240240
micro_features_ = new MicroFeature[num_micro_features_];
241-
for (int f = 0; f < num_micro_features_; ++f) {
241+
for (uint32_t f = 0; f < num_micro_features_; ++f) {
242242
for (int d = 0; d < MFCount; ++d) {
243243
micro_features_[f][d] = char_features->Features[f]->Params[d];
244244
}
@@ -294,7 +294,7 @@ void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) {
294294
// Returns a pix representing the sample. (Int features only.)
295295
Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
296296
Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
297-
for (int f = 0; f < num_features_; ++f) {
297+
for (uint32_t f = 0; f < num_features_; ++f) {
298298
int start_x = features_[f].X;
299299
int start_y = kIntFeatureExtent - features_[f].Y;
300300
double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
@@ -315,7 +315,7 @@ Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
315315
void TrainingSample::DisplayFeatures(ScrollView::Color color,
316316
ScrollView* window) const {
317317
#ifndef GRAPHICS_DISABLED
318-
for (int f = 0; f < num_features_; ++f) {
318+
for (uint32_t f = 0; f < num_features_; ++f) {
319319
RenderIntFeature(window, &features_[f], color);
320320
}
321321
#endif // GRAPHICS_DISABLED

src/classify/trainingsample.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,13 @@ class TrainingSample : public ELIST_LINK {
137137
void set_bounding_box(const TBOX& box) {
138138
bounding_box_ = box;
139139
}
140-
int num_features() const {
140+
uint32_t num_features() const {
141141
return num_features_;
142142
}
143143
const INT_FEATURE_STRUCT* features() const {
144144
return features_;
145145
}
146-
int num_micro_features() const {
146+
uint32_t num_micro_features() const {
147147
return num_micro_features_;
148148
}
149149
const MicroFeature* micro_features() const {
@@ -206,9 +206,9 @@ class TrainingSample : public ELIST_LINK {
206206
// Bounding box of sample in original image.
207207
TBOX bounding_box_;
208208
// Number of INT_FEATURE_STRUCT in features_ array.
209-
int num_features_;
209+
uint32_t num_features_;
210210
// Number of MicroFeature in micro_features_ array.
211-
int num_micro_features_;
211+
uint32_t num_micro_features_;
212212
// Total length of outline in the baseline normalized coordinate space.
213213
// See comment in WERD_RES class definition for a discussion of coordinate
214214
// spaces.

0 commit comments

Comments
 (0)