Skip to content

Commit b68afd0

Browse files
authored
Merge pull request #341 from VikParuchuri/dev
2 parents 7e5ac9d + a32b55e commit b68afd0

File tree

3 files changed

+9
-12
lines changed

3 files changed

+9
-12
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "surya-ocr"
3-
version = "0.13.0"
3+
version = "0.13.1"
44
description = "OCR, layout, reading order, and table recognition in 90+ languages"
55
authors = ["Vik Paruchuri <[email protected]>"]
66
readme = "README.md"

surya/detection/__init__.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -135,26 +135,22 @@ def batch_detection(
135135

136136
class InlineDetectionPredictor(DetectionPredictor):
137137
model_loader_cls = InlineDetectionModelLoader
138-
139-
def batch_generator(self, iterable, batch_size=None):
140-
if batch_size is None:
141-
batch_size = self.get_batch_size()
142-
143-
for i in range(0, len(iterable), batch_size):
144-
yield iterable[i:i+batch_size]
145138

146139
def __call__(self, images, text_boxes: List[List[List[float]]], batch_size=None, include_maps=False) -> List[TextDetectionResult]:
147140
detection_generator = self.batch_detection(images, batch_size=batch_size, static_cache=settings.DETECTOR_STATIC_CACHE)
148-
text_box_generator = self.batch_generator(text_boxes, batch_size=batch_size)
149141

150142
postprocessing_futures = []
151143
max_workers = min(settings.DETECTOR_POSTPROCESSING_CPU_WORKERS, len(images))
152144
parallelize = not settings.IN_STREAMLIT and len(images) >= settings.DETECTOR_MIN_PARALLEL_THRESH
153145
executor = ThreadPoolExecutor if parallelize else FakeExecutor
146+
147+
image_idx = 0
154148
with executor(max_workers=max_workers) as e:
155-
for (preds, orig_sizes), batch_text_boxes in zip(detection_generator, text_box_generator):
156-
for pred, orig_size, image_text_boxes in zip(preds, orig_sizes, batch_text_boxes):
157-
postprocessing_futures.append(e.submit(parallel_get_inline_boxes, pred, orig_size, image_text_boxes, include_maps))
149+
for (preds, orig_sizes) in detection_generator:
150+
for pred, orig_size in zip(preds, orig_sizes):
151+
postprocessing_futures.append(e.submit(parallel_get_inline_boxes, pred, orig_size, text_boxes[image_idx], include_maps))
152+
image_idx += 1
158153

159154
assert len(postprocessing_futures) == len(images) == len(text_boxes) # Ensure we have a 1:1 mapping
155+
160156
return [future.result() for future in postprocessing_futures]

surya/detection/heatmap.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ def parallel_get_boxes(preds, orig_sizes, include_maps=False):
156156
# Skip for vertical boxes
157157
if box.height < 3 * box.width:
158158
box.expand(x_margin=0, y_margin=settings.DETECTOR_BOX_Y_EXPAND_MARGIN)
159+
box.fit_to_bounds([0, 0, orig_sizes[0], orig_sizes[1]]) # Fix any bad expands
159160

160161
result = TextDetectionResult(
161162
bboxes=bboxes,

0 commit comments

Comments
 (0)