Skip to content

Commit bbc2aef

Browse files
triklozoidmakseq
andauthored
fix: LEAP-1692: Image export for COCO and YOLO (#383)
Co-authored-by: Max Tkachenko <[email protected]>
1 parent 6be5737 commit bbc2aef

File tree

3 files changed

+78
-16
lines changed

3 files changed

+78
-16
lines changed

src/label_studio_sdk/_extensions/label_studio_tools/core/utils/io.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,9 @@ def get_local_path(
124124
if is_uploaded_file and os.path.exists(image_dir):
125125
project_id = url.split("/")[-2] # To retrieve project_id
126126
filepath = os.path.join(image_dir, project_id, os.path.basename(url))
127-
if cache_dir and download_resources:
128-
shutil.copy(filepath, cache_dir)
129127
if os.path.exists(filepath):
128+
if cache_dir and download_resources:
129+
shutil.copy(filepath, cache_dir)
130130
logger.debug(f"Uploaded file: Path exists in image_dir: {filepath}")
131131
return filepath
132132

@@ -202,7 +202,7 @@ def download_and_cache(
202202
filepath = os.path.join(cache_dir, url_hash + "__" + url_filename)
203203

204204
if not os.path.exists(filepath):
205-
logger.info("Download {url} to {filepath}".format(url=url, filepath=filepath))
205+
logger.info("Download {url} to {filepath}. download_resources: {download_resources}".format(url=url, filepath=filepath, download_resources=download_resources))
206206
if download_resources:
207207
headers = {
208208
# avoid requests.exceptions.HTTPError: 403 Client Error: Forbidden. Please comply with the User-Agent policy:
@@ -227,6 +227,7 @@ def download_and_cache(
227227
raise e
228228
with io.open(filepath, mode="wb") as fout:
229229
fout.write(r.content)
230+
logger.info(f"File downloaded to {filepath}")
230231
return filepath
231232

232233

src/label_studio_sdk/converter/converter.py

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
convert_annotation_to_yolo,
3434
convert_annotation_to_yolo_obb,
3535
)
36+
from label_studio_sdk._extensions.label_studio_tools.core.utils.io import get_local_path
3637

3738
logger = logging.getLogger(__name__)
3839

@@ -55,6 +56,9 @@ class Format(Enum):
5556
YOLO = 11
5657
YOLO_OBB = 12
5758
CSV_OLD = 13
59+
YOLO_WITH_IMAGES = 14
60+
COCO_WITH_IMAGES = 15
61+
YOLO_OBB_WITH_IMAGES = 16
5862

5963
def __str__(self):
6064
return self.name
@@ -106,6 +110,12 @@ class Converter(object):
106110
"link": "https://labelstud.io/guide/export.html#COCO",
107111
"tags": ["image segmentation", "object detection"],
108112
},
113+
Format.COCO_WITH_IMAGES: {
114+
"title": "COCO with Images",
115+
"description": "COCO format with images downloaded.",
116+
"link": "https://labelstud.io/guide/export.html#COCO",
117+
"tags": ["image segmentation", "object detection"],
118+
},
109119
Format.VOC: {
110120
"title": "Pascal VOC XML",
111121
"description": "Popular XML format used for object detection and polygon image segmentation tasks.",
@@ -119,6 +129,12 @@ class Converter(object):
119129
"link": "https://labelstud.io/guide/export.html#YOLO",
120130
"tags": ["image segmentation", "object detection"],
121131
},
132+
Format.YOLO_WITH_IMAGES: {
133+
"title": "YOLO with Images",
134+
"description": "YOLO format with images downloaded.",
135+
"link": "https://labelstud.io/guide/export.html#YOLO",
136+
"tags": ["image segmentation", "object detection"],
137+
},
122138
Format.YOLO_OBB: {
123139
"title": "YOLOv8 OBB",
124140
"description": "Popular TXT format is created for each image file. Each txt file contains annotations for "
@@ -127,6 +143,12 @@ class Converter(object):
127143
"link": "https://labelstud.io/guide/export.html#YOLO",
128144
"tags": ["image segmentation", "object detection"],
129145
},
146+
Format.YOLO_OBB_WITH_IMAGES: {
147+
"title": "YOLOv8 OBB with Images",
148+
"description": "YOLOv8 OBB format with images downloaded.",
149+
"link": "https://labelstud.io/guide/export.html#YOLO",
150+
"tags": ["image segmentation", "object detection"],
151+
},
130152
Format.BRUSH_TO_NUMPY: {
131153
"title": "Brush labels to NumPy",
132154
"description": "Export your brush labels as NumPy 2d arrays. Each label outputs as one image.",
@@ -158,6 +180,8 @@ def __init__(
158180
output_tags=None,
159181
upload_dir=None,
160182
download_resources=True,
183+
access_token=None,
184+
hostname=None,
161185
):
162186
"""Initialize Label Studio Converter for Exports
163187
@@ -171,6 +195,8 @@ def __init__(
171195
self.upload_dir = upload_dir
172196
self.download_resources = download_resources
173197
self._schema = None
198+
self.access_token = access_token
199+
self.hostname = hostname
174200

175201
if isinstance(config, dict):
176202
self._schema = config
@@ -216,21 +242,23 @@ def convert(self, input_data, output_data, format, is_dir=True, **kwargs):
216242
)
217243
elif format == Format.CONLL2003:
218244
self.convert_to_conll2003(input_data, output_data, is_dir=is_dir)
219-
elif format == Format.COCO:
245+
elif format in [Format.COCO, Format.COCO_WITH_IMAGES]:
220246
image_dir = kwargs.get("image_dir")
247+
self.download_resources = format == Format.COCO_WITH_IMAGES
221248
self.convert_to_coco(
222249
input_data, output_data, output_image_dir=image_dir, is_dir=is_dir
223250
)
224-
elif format == Format.YOLO or format == Format.YOLO_OBB:
251+
elif format in [Format.YOLO, Format.YOLO_OBB, Format.YOLO_OBB_WITH_IMAGES, Format.YOLO_WITH_IMAGES]:
225252
image_dir = kwargs.get("image_dir")
226253
label_dir = kwargs.get("label_dir")
254+
self.download_resources = format in [Format.YOLO_WITH_IMAGES, Format.YOLO_OBB_WITH_IMAGES]
227255
self.convert_to_yolo(
228256
input_data,
229257
output_data,
230258
output_image_dir=image_dir,
231259
output_label_dir=label_dir,
232260
is_dir=is_dir,
233-
is_obb=(format == Format.YOLO_OBB),
261+
is_obb=(format in [Format.YOLO_OBB, Format.YOLO_OBB_WITH_IMAGES]),
234262
)
235263
elif format == Format.VOC:
236264
image_dir = kwargs.get("image_dir")
@@ -334,7 +362,9 @@ def _get_supported_formats(self):
334362
and "Labels" in output_tag_types
335363
):
336364
all_formats.remove(Format.COCO.name)
365+
all_formats.remove(Format.COCO_WITH_IMAGES.name)
337366
all_formats.remove(Format.YOLO.name)
367+
all_formats.remove(Format.YOLO_WITH_IMAGES.name)
338368
if not (
339369
"Image" in input_tag_types
340370
and (
@@ -353,6 +383,7 @@ def _get_supported_formats(self):
353383
all_formats.remove(Format.ASR_MANIFEST.name)
354384
if is_mig or ('Video' in input_tag_types and 'TimelineLabels' in output_tag_types):
355385
all_formats.remove(Format.YOLO_OBB.name)
386+
all_formats.remove(Format.YOLO_OBB_WITH_IMAGES.name)
356387

357388
return all_formats
358389

@@ -593,20 +624,25 @@ def add_image(images, width, height, image_id, image_path):
593624
)
594625
for item_idx, item in enumerate(item_iterator):
595626
image_path = item["input"][data_key]
627+
task_id = item["id"]
596628
image_id = len(images)
597629
width = None
598630
height = None
599631
# download all images of the dataset, including the ones without annotations
600632
if not os.path.exists(image_path):
601633
try:
602-
image_path = download(
603-
image_path,
604-
output_image_dir,
634+
image_path = get_local_path(
635+
url=image_path,
636+
hostname=self.hostname,
605637
project_dir=self.project_dir,
606-
return_relative_path=True,
607-
upload_dir=self.upload_dir,
638+
image_dir=self.upload_dir,
639+
cache_dir=output_image_dir,
608640
download_resources=self.download_resources,
641+
access_token=self.access_token,
642+
task_id=task_id,
609643
)
644+
# make path relative to output_image_dir
645+
image_path = os.path.relpath(image_path, output_dir)
610646
except:
611647
logger.info(
612648
"Unable to download {image_path}. The image of {item} will be skipped".format(
@@ -801,19 +837,24 @@ def convert_to_yolo(
801837
image_paths = [image_paths] if isinstance(image_paths, str) else image_paths
802838
# download image(s)
803839
image_path = None
840+
task_id = item["id"]
804841
# TODO: for multi-page annotation, this code won't produce correct relationships between page and annotated shapes
805842
# fixing the issue in RND-84
806843
for image_path in reversed(image_paths):
807844
if not os.path.exists(image_path):
808845
try:
809-
image_path = download(
810-
image_path,
811-
output_image_dir,
846+
image_path = get_local_path(
847+
url=image_path,
848+
hostname=self.hostname,
812849
project_dir=self.project_dir,
813-
return_relative_path=True,
814-
upload_dir=self.upload_dir,
850+
image_dir=self.upload_dir,
851+
cache_dir=output_image_dir,
815852
download_resources=self.download_resources,
853+
access_token=self.access_token,
854+
task_id=task_id,
816855
)
856+
# make path relative to output_image_dir
857+
image_path = os.path.relpath(image_path, output_dir)
817858
except:
818859
logger.info(
819860
"Unable to download {image_path}. The item {item} will be skipped".format(
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import pytest
2+
from unittest.mock import patch
3+
from label_studio_sdk.converter import Converter
4+
5+
@pytest.mark.parametrize("format_name,expected_download_resources", [
6+
("YOLO_WITH_IMAGES", True),
7+
("YOLO", False)
8+
])
9+
def test_download_resources(format_name, expected_download_resources):
10+
"""Test that download_resources is True for YOLO_WITH_IMAGES and False for simple YOLO"""
11+
with patch.object(Converter, 'convert_to_yolo', return_value=None) as mock_convert:
12+
converter = Converter(config={}, project_dir=".")
13+
converter.convert(
14+
input_data="dummy_input",
15+
output_data="dummy_output",
16+
format=format_name,
17+
is_dir=False,
18+
)
19+
assert converter.download_resources == expected_download_resources
20+
mock_convert.assert_called_once()

0 commit comments

Comments
 (0)