Skip to content

Commit ce8452a

Browse files
authored
feat: add optional followlinks to datasamples spec (#426)
Signed-off-by: ThibaultFy <[email protected]>
1 parent a7a2631 commit ce8452a

File tree

2 files changed

+18
-7
lines changed

2 files changed

+18
-7
lines changed

substra/sdk/schemas.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import contextlib
22
import enum
33
import json
4+
import logging
45
import pathlib
56
import typing
67
import uuid
@@ -14,6 +15,8 @@
1415

1516
from substra.sdk import utils
1617

18+
logger = logging.getLogger(__name__)
19+
1720
_SERVER_NAMES = {
1821
"dataset": "data_manager",
1922
"summary_task": "task",
@@ -142,11 +145,14 @@ class DataSampleSpec(_Spec):
142145
"""Specification to create one or many data samples
143146
To create one data sample, use the 'path' field, otherwise use
144147
the 'paths' field.
148+
Use 'followlinks' to follow symbolic links recursively. Note that it will lead to infinite
149+
loops if a symbolic link points to a parent directory.
145150
"""
146151

147152
path: Optional[pathlib.Path] = None # Path to the data sample if only one
148153
paths: Optional[List[pathlib.Path]] = None # Path to the data samples if several
149154
data_manager_keys: typing.List[str]
155+
followlinks: Optional[bool] = False
150156

151157
type_: typing.ClassVar[Type] = Type.DataSample
152158

@@ -186,8 +192,13 @@ def build_request_kwargs(self, local):
186192
# redefine kwargs builder to handle the local paths
187193
# Serialize and deserialize to prevent errors eg with pathlib.Path
188194
data = json.loads(self.model_dump_json(exclude_unset=True))
195+
if self.followlinks:
196+
logger.warning(
197+
"The 'followlinks' option is enabled for your datasample registration. It may lead to infinite loops "
198+
"if a symbolic link points to a parent directory."
199+
)
189200
if local:
190-
with utils.extract_data_sample_files(data) as (data, files):
201+
with utils.extract_data_sample_files(data, followlinks=self.followlinks) as (data, files):
191202
yield (data, files)
192203
else:
193204
yield data, None

substra/sdk/utils.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,25 +44,25 @@ def extract_files(data, file_attributes):
4444
f.close()
4545

4646

47-
def zip_folder(fp, path):
47+
def zip_folder(fp, path, followlinks=False):
4848
zipf = zipfile.ZipFile(fp, "w", zipfile.ZIP_DEFLATED)
49-
for root, _, files in os.walk(path):
49+
for root, _, files in os.walk(path, followlinks=followlinks):
5050
for f in files:
5151
abspath = os.path.join(root, f)
5252
archive_path = os.path.relpath(abspath, start=path)
5353
zipf.write(abspath, arcname=archive_path)
5454
zipf.close()
5555

5656

57-
def zip_folder_in_memory(path):
57+
def zip_folder_in_memory(path, followlinks=False):
5858
fp = io.BytesIO()
59-
zip_folder(fp, path)
59+
zip_folder(fp, path, followlinks=followlinks)
6060
fp.seek(0)
6161
return fp
6262

6363

6464
@contextlib.contextmanager
65-
def extract_data_sample_files(data):
65+
def extract_data_sample_files(data, followlinks=False):
6666
# handle data sample specific case; paths and path cases
6767
data = copy.deepcopy(data)
6868

@@ -81,7 +81,7 @@ def extract_data_sample_files(data):
8181
for k, f in folders.items():
8282
if not os.path.isdir(f):
8383
raise exceptions.LoadDataException(f"Paths '{f}' is not an existing directory")
84-
files[k] = zip_folder_in_memory(f)
84+
files[k] = zip_folder_in_memory(f, followlinks=followlinks)
8585

8686
try:
8787
yield (data, files)

0 commit comments

Comments
 (0)