Skip to content

Commit ac03917

Browse files
authored
Merge pull request #270 from VikParuchuri/dev
Update layout model
2 parents 0774cef + 76754bc commit ac03917

File tree

4 files changed

+8
-5
lines changed

4 files changed

+8
-5
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "surya-ocr"
3-
version = "0.8.1"
3+
version = "0.8.2"
44
description = "OCR, layout, reading order, and table recognition in 90+ languages"
55
authors = ["Vik Paruchuri <[email protected]>"]
66
readme = "README.md"

surya/layout.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,10 @@ def batch_layout_detection(images: List, model, processor, batch_size=None, top_
181181
prediction["pause_tokens"] = last_prediction["pause_tokens"]
182182
prediction["token"].fill_(model.decoder.config.pause_token_id)
183183
batch_decoder_input[j, :] = model.decoder.config.pause_token_id
184-
elif intersects_other_boxes(prediction["polygon"], [p["polygon"] for p in batch_predictions[j]], thresh=.4):
184+
elif intersects_other_boxes(
185+
prediction["polygon"],
186+
[p["polygon"] for p in batch_predictions[j]], thresh=.4
187+
) and model.decoder.config.max_pause_tokens > 0:
185188
prediction["paused"] = True
186189
prediction["pause_tokens"] = 1
187190
prediction["token"].fill_(model.decoder.config.pause_token_id)

surya/model/layout/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ def __init__(
173173
aux_heads=0, # How many n-token-ahead heads to add
174174
causal=True,
175175
layer_norm_eps=1e-5,
176-
pause_token_count=5,
177-
max_pause_tokens=3,
176+
pause_token_count=0,
177+
max_pause_tokens=0,
178178
**kwargs,
179179
):
180180
self.num_hidden_layers = num_hidden_layers

surya/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def TORCH_DEVICE_MODEL(self) -> str:
6565
RECOGNITION_ENCODER_BATCH_DIVISOR: int = 1 # Divisor for batch size in decoder
6666

6767
# Layout
68-
LAYOUT_MODEL_CHECKPOINT: str = "datalab-to/surya_layout0"
68+
LAYOUT_MODEL_CHECKPOINT: str = "datalab-to/surya_layout"
6969
LAYOUT_IMAGE_SIZE: Dict = {"height": 768, "width": 768}
7070
LAYOUT_SLICE_MIN: Dict = {"height": 1500, "width": 1500} # When to start slicing images
7171
LAYOUT_SLICE_SIZE: Dict = {"height": 1200, "width": 1200} # Size of slices

0 commit comments

Comments
 (0)