Merge pull request #319 from VikParuchuri/dev

VikParuchuri · web-flow · commit 5b61bd77e14e · 2025-02-12T17:31:17.000-08:00
Dev
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "surya-ocr"
-version = "0.11.0"
+version = "0.11.1"
 description = "OCR, layout, reading order, and table recognition in 90+ languages"
 authors = ["Vik Paruchuri <vik.paruchuri@gmail.com>"]
 readme = "README.md"
diff --git a/surya/detection/__init__.py b/surya/detection/__init__.py
@@ -145,15 +145,16 @@ def batch_generator(self, iterable, batch_size=None):
 
     def __call__(self, images, text_boxes: List[List[List[float]]], batch_size=None, include_maps=False) -> List[TextDetectionResult]:
         detection_generator = self.batch_detection(images, batch_size=batch_size, static_cache=settings.DETECTOR_STATIC_CACHE)
-        text_box_generator = self.batch_generator(text_boxes)
+        text_box_generator = self.batch_generator(text_boxes, batch_size=batch_size)
 
         postprocessing_futures = []
         max_workers = min(settings.DETECTOR_POSTPROCESSING_CPU_WORKERS, len(images))
         parallelize = not settings.IN_STREAMLIT and len(images) >= settings.DETECTOR_MIN_PARALLEL_THRESH
         executor = ThreadPoolExecutor if parallelize else FakeExecutor
         with executor(max_workers=max_workers) as e:
             for (preds, orig_sizes), batch_text_boxes in zip(detection_generator, text_box_generator):
-                for pred, orig_size, text_boxes in zip(preds, orig_sizes, batch_text_boxes):
-                    postprocessing_futures.append(e.submit(parallel_get_inline_boxes, pred, orig_size, text_boxes, include_maps))
+                for pred, orig_size, image_text_boxes in zip(preds, orig_sizes, batch_text_boxes):
+                    postprocessing_futures.append(e.submit(parallel_get_inline_boxes, pred, orig_size, image_text_boxes, include_maps))
 
+        assert len(postprocessing_futures) == len(images) == len(text_boxes) # Ensure we have a 1:1 mapping
         return [future.result() for future in postprocessing_futures]
diff --git a/surya/scripts/streamlit_app.py b/surya/scripts/streamlit_app.py
@@ -69,7 +69,7 @@ def text_detection(img) -> (Image.Image, TextDetectionResult):
     text_pred = predictors["detection"]([img])[0]
     text_polygons = [p.polygon for p in text_pred.bboxes]
     det_img = draw_polys_on_image(text_polygons, img.copy())
-    return det_img, text_pred, inline_pred
+    return det_img, text_pred
 
 
 def layout_detection(img) -> (Image.Image, LayoutResult):
@@ -211,11 +211,10 @@ def page_counter(pdf_file):
 
 # Run Text Detection
 if run_text_det:
-    det_img, text_pred, inline_pred = text_detection(pil_image)
+    det_img, text_pred = text_detection(pil_image)
     with col1:
         st.image(det_img, caption="Detected Text", use_container_width=True)
         st.json(text_pred.model_dump(exclude=["heatmap", "affinity_map"]), expanded=True)
-        st.json(inline_pred.model_dump(exclude=["heatmap", "affinity_map"]), expanded=True)
 
 if run_inline_det:
     det_img, text_pred, inline_pred = inline_detection(pil_image)