Skip to content

Commit 5b61bd7

Browse files
authored
Merge pull request #319 from VikParuchuri/dev
Dev
2 parents d349f30 + ac1223f commit 5b61bd7

File tree

3 files changed

+7
-7
lines changed

3 files changed

+7
-7
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "surya-ocr"
3-
version = "0.11.0"
3+
version = "0.11.1"
44
description = "OCR, layout, reading order, and table recognition in 90+ languages"
55
authors = ["Vik Paruchuri <[email protected]>"]
66
readme = "README.md"

surya/detection/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,16 @@ def batch_generator(self, iterable, batch_size=None):
145145

146146
def __call__(self, images, text_boxes: List[List[List[float]]], batch_size=None, include_maps=False) -> List[TextDetectionResult]:
147147
detection_generator = self.batch_detection(images, batch_size=batch_size, static_cache=settings.DETECTOR_STATIC_CACHE)
148-
text_box_generator = self.batch_generator(text_boxes)
148+
text_box_generator = self.batch_generator(text_boxes, batch_size=batch_size)
149149

150150
postprocessing_futures = []
151151
max_workers = min(settings.DETECTOR_POSTPROCESSING_CPU_WORKERS, len(images))
152152
parallelize = not settings.IN_STREAMLIT and len(images) >= settings.DETECTOR_MIN_PARALLEL_THRESH
153153
executor = ThreadPoolExecutor if parallelize else FakeExecutor
154154
with executor(max_workers=max_workers) as e:
155155
for (preds, orig_sizes), batch_text_boxes in zip(detection_generator, text_box_generator):
156-
for pred, orig_size, text_boxes in zip(preds, orig_sizes, batch_text_boxes):
157-
postprocessing_futures.append(e.submit(parallel_get_inline_boxes, pred, orig_size, text_boxes, include_maps))
156+
for pred, orig_size, image_text_boxes in zip(preds, orig_sizes, batch_text_boxes):
157+
postprocessing_futures.append(e.submit(parallel_get_inline_boxes, pred, orig_size, image_text_boxes, include_maps))
158158

159+
assert len(postprocessing_futures) == len(images) == len(text_boxes) # Ensure we have a 1:1 mapping
159160
return [future.result() for future in postprocessing_futures]

surya/scripts/streamlit_app.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def text_detection(img) -> (Image.Image, TextDetectionResult):
6969
text_pred = predictors["detection"]([img])[0]
7070
text_polygons = [p.polygon for p in text_pred.bboxes]
7171
det_img = draw_polys_on_image(text_polygons, img.copy())
72-
return det_img, text_pred, inline_pred
72+
return det_img, text_pred
7373

7474

7575
def layout_detection(img) -> (Image.Image, LayoutResult):
@@ -211,11 +211,10 @@ def page_counter(pdf_file):
211211

212212
# Run Text Detection
213213
if run_text_det:
214-
det_img, text_pred, inline_pred = text_detection(pil_image)
214+
det_img, text_pred = text_detection(pil_image)
215215
with col1:
216216
st.image(det_img, caption="Detected Text", use_container_width=True)
217217
st.json(text_pred.model_dump(exclude=["heatmap", "affinity_map"]), expanded=True)
218-
st.json(inline_pred.model_dump(exclude=["heatmap", "affinity_map"]), expanded=True)
219218

220219
if run_inline_det:
221220
det_img, text_pred, inline_pred = inline_detection(pil_image)

0 commit comments

Comments
 (0)