Skip to content

Commit 2672ea2

Browse files
freddyaboultongradio-pr-botabidlabs
authored
Postprocess hardening (#9122)
* hardenning * Fix code * add changeset * Fix tests * add test fuzzer * Clean up * revert * Fix * Add code --------- Co-authored-by: gradio-pr-bot <[email protected]> Co-authored-by: Abubakar Abid <[email protected]>
1 parent 5cedf16 commit 2672ea2

File tree

9 files changed

+183
-53
lines changed

9 files changed

+183
-53
lines changed

.changeset/deep-ways-wink.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"gradio": minor
3+
---
4+
5+
feat:Postprocess hardening

gradio/blocks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1772,7 +1772,6 @@ async def postprocess_data(
17721772
kwargs["render"] = False
17731773

17741774
state[block._id] = block.__class__(**kwargs)
1775-
17761775
prediction_value = postprocess_update_dict(
17771776
block=state[block._id],
17781777
update_dict=prediction_value,
@@ -1909,6 +1908,7 @@ async def process_api(
19091908
batch = block_fn.batch
19101909
state_ids_to_track, hashed_values = self.get_state_ids_to_track(block_fn, state)
19111910
changed_state_ids = []
1911+
LocalContext.blocks.set(self)
19121912

19131913
if batch:
19141914
max_batch_size = block_fn.max_batch_size

gradio/processing_utils.py

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@
2323
from PIL import Image, ImageOps, ImageSequence, PngImagePlugin
2424

2525
from gradio import utils, wasm_utils
26+
from gradio.context import LocalContext
2627
from gradio.data_classes import FileData, GradioModel, GradioRootModel, JsonData
27-
from gradio.exceptions import Error
28+
from gradio.exceptions import Error, InvalidPathError
2829
from gradio.utils import abspath, get_hash_seed, get_upload_folder, is_in_or_equal
2930

3031
with warnings.catch_warnings():
@@ -426,14 +427,8 @@ def _move_to_cache(d: dict):
426427
pass
427428
elif not block.proxy_url:
428429
# If the file is on a remote server, do not move it to cache.
429-
if check_in_upload_folder and not client_utils.is_http_url_like(
430-
payload.path
431-
):
432-
path = os.path.abspath(payload.path)
433-
if not is_in_or_equal(path, get_upload_folder()):
434-
raise ValueError(
435-
f"File {path} is not in the upload folder and cannot be accessed."
436-
)
430+
if not client_utils.is_http_url_like(payload.path):
431+
_check_allowed(payload.path, check_in_upload_folder)
437432
if not payload.is_stream:
438433
temp_file_path = block.move_resource_to_block_cache(payload.path)
439434
if temp_file_path is None:
@@ -462,6 +457,52 @@ def _move_to_cache(d: dict):
462457
return client_utils.traverse(data, _move_to_cache, client_utils.is_file_obj)
463458

464459

460+
def _check_allowed(path: str | Path, check_in_upload_folder: bool):
461+
blocks = LocalContext.blocks.get()
462+
if blocks is None or not blocks.is_running:
463+
return
464+
465+
abs_path = utils.abspath(path)
466+
467+
# if check_in_upload_folder=True
468+
# we are running this during pre-process
469+
# in which case only files in the upload_folder (cache_dir)
470+
# are accepted
471+
allowed = [utils.get_upload_folder()]
472+
if not check_in_upload_folder:
473+
allowed += blocks.allowed_paths + [os.getcwd(), tempfile.gettempdir()]
474+
475+
allowed, reason = utils.is_allowed_file(
476+
abs_path,
477+
blocked_paths=blocks.blocked_paths,
478+
allowed_paths=allowed,
479+
)
480+
if not allowed:
481+
msg = f"Cannot move {abs_path} to the gradio cache dir because "
482+
if reason == "in_blocklist":
483+
msg += f"it is located in one of the blocked_paths ({', '.join(blocks.blocked_paths)})."
484+
elif check_in_upload_folder:
485+
msg += "it was not uploaded by a user."
486+
else:
487+
msg += "it was not created by the application or it is not "
488+
msg += "located in either the current working directory or your system's temp directory. "
489+
msg += "To fix this error, please ensure your function returns files located in either "
490+
msg += f"the current working directory ({os.getcwd()}), your system's temp directory ({tempfile.gettempdir()}) "
491+
msg += f"or add {str(abs_path.parent)} to the allowed_paths parameter of launch()."
492+
raise InvalidPathError(msg)
493+
if (
494+
utils.is_in_or_equal(abs_path, os.getcwd())
495+
and abs_path.name.startswith(".")
496+
and not any(
497+
is_in_or_equal(path, allowed_path) for allowed_path in blocks.allowed_paths
498+
)
499+
):
500+
raise InvalidPathError(
501+
"Dotfiles located in the temporary directory cannot be moved to the cache for security reasons. "
502+
"If you'd like to specifically allow this file to be served, you can add it to the allowed_paths parameter of launch()."
503+
)
504+
505+
465506
async def async_move_files_to_cache(
466507
data: Any,
467508
block: Block,
@@ -494,14 +535,8 @@ async def _move_to_cache(d: dict):
494535
pass
495536
elif not block.proxy_url:
496537
# If the file is on a remote server, do not move it to cache.
497-
if check_in_upload_folder and not client_utils.is_http_url_like(
498-
payload.path
499-
):
500-
path = os.path.abspath(payload.path)
501-
if not is_in_or_equal(path, get_upload_folder()):
502-
raise ValueError(
503-
f"File {path} is not in the upload folder and cannot be accessed."
504-
)
538+
if not client_utils.is_http_url_like(payload.path):
539+
_check_allowed(payload.path, check_in_upload_folder)
505540
if not payload.is_stream:
506541
temp_file_path = await block.async_move_resource_to_block_cache(
507542
payload.path

gradio/routes.py

Lines changed: 10 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -547,44 +547,21 @@ async def file(path_or_url: str, request: fastapi.Request):
547547
raise HTTPException(403, f"File not allowed: {path_or_url}.")
548548

549549
abs_path = utils.abspath(path_or_url)
550-
551-
in_blocklist = any(
552-
utils.is_in_or_equal(abs_path, blocked_path)
553-
for blocked_path in blocks.blocked_paths
554-
)
555-
556-
is_dir = abs_path.is_dir()
557-
558-
if is_dir or in_blocklist:
550+
if abs_path.is_dir() or not abs_path.exists():
559551
raise HTTPException(403, f"File not allowed: {path_or_url}.")
560552

561-
created_by_app = False
562-
for temp_file_set in blocks.temp_file_sets:
563-
if abs_path in temp_file_set:
564-
created_by_app = True
565-
break
566-
in_allowlist = any(
567-
utils.is_in_or_equal(abs_path, allowed_path)
568-
for allowed_path in blocks.allowed_paths
569-
)
570-
is_static_file = utils.is_static_file(abs_path)
571-
was_uploaded = utils.is_in_or_equal(abs_path, app.uploaded_file_dir)
572-
is_cached_example = utils.is_in_or_equal(
573-
abs_path, utils.abspath(utils.get_cache_folder())
574-
)
553+
from gradio.data_classes import _StaticFiles
575554

576-
if not (
577-
created_by_app
578-
or in_allowlist
579-
or was_uploaded
580-
or is_cached_example
581-
or is_static_file
582-
):
555+
allowed, _ = utils.is_allowed_file(
556+
abs_path,
557+
blocked_paths=blocks.blocked_paths,
558+
allowed_paths=blocks.allowed_paths
559+
+ [app.uploaded_file_dir, utils.get_cache_folder()]
560+
+ _StaticFiles.all_paths,
561+
)
562+
if not allowed:
583563
raise HTTPException(403, f"File not allowed: {path_or_url}.")
584564

585-
if not abs_path.exists():
586-
raise HTTPException(404, f"File not found: {path_or_url}.")
587-
588565
range_val = request.headers.get("Range", "").strip()
589566
if range_val.startswith("bytes=") and "-" in range_val:
590567
range_val = range_val[6:]

gradio/utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ def swap_blocks(self, demo: Blocks):
119119
# not a new queue
120120
demo._queue = self.running_app.blocks._queue
121121
demo.max_file_size = self.running_app.blocks.max_file_size
122+
demo.is_running = True
122123
self.running_app.state_holder.reset(demo)
123124
self.running_app.blocks = demo
124125

@@ -1487,3 +1488,22 @@ def safe_join(directory: DeveloperPath, path: UserProvidedPath) -> str:
14871488
raise InvalidPathError()
14881489

14891490
return fullpath
1491+
1492+
1493+
def is_allowed_file(
1494+
path: Path,
1495+
blocked_paths: Sequence[str | Path],
1496+
allowed_paths: Sequence[str | Path],
1497+
) -> tuple[bool, Literal["in_blocklist", "allowed", "not_created_or_allowed"]]:
1498+
in_blocklist = any(
1499+
is_in_or_equal(path, blocked_path) for blocked_path in blocked_paths
1500+
)
1501+
if in_blocklist:
1502+
return False, "in_blocklist"
1503+
1504+
in_allowedlist = any(
1505+
is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths
1506+
)
1507+
if in_allowedlist:
1508+
return True, "allowed"
1509+
return False, "not_created_or_allowed"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import gradio as gr
2+
3+
4+
def test_download_button_sets_origname():
5+
value = gr.DownloadButton().postprocess("/home/image.png")
6+
assert value.orig_name == "image.png" # type: ignore

test/test_blocks.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,3 +1802,36 @@ def delete_fn(v):
18021802
)
18031803
finally:
18041804
demo.close()
1805+
1806+
1807+
def test_post_process_file_blocked(connect):
1808+
dotfile = pathlib.Path(".foo.txt")
1809+
file = pathlib.Path(os.getcwd()) / ".." / "file.txt"
1810+
1811+
try:
1812+
demo = gr.Interface(lambda s: s, "text", "file")
1813+
with connect(demo, show_error=True) as client:
1814+
_ = client.predict("test/test_files/bus.png")
1815+
with pytest.raises(
1816+
ValueError,
1817+
match="to the gradio cache dir because it was not created by",
1818+
):
1819+
file.write_text("Hi")
1820+
client.predict(str(file))
1821+
1822+
with connect(demo, allowed_paths=[str(file)]) as client:
1823+
_ = client.predict(str(file))
1824+
1825+
dotfile.write_text("foo")
1826+
with connect(demo, show_error=True) as client:
1827+
with pytest.raises(ValueError, match="Dotfiles located"):
1828+
_ = client.predict(str(dotfile))
1829+
1830+
with connect(demo, allowed_paths=[str(dotfile)]) as client:
1831+
_ = client.predict(str(dotfile))
1832+
1833+
finally:
1834+
try:
1835+
dotfile.unlink()
1836+
except FileNotFoundError:
1837+
pass

test/test_processing_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ def test_video_has_playable_codecs(self, test_file_dir):
276276
)
277277

278278
def raise_ffmpy_runtime_exception(*args, **kwargs):
279-
raise ffmpy.FFRuntimeError("", "", "", "")
279+
raise ffmpy.FFRuntimeError("", "", "", "") # type: ignore
280280

281281
@pytest.mark.parametrize(
282282
"exception_to_raise", [raise_ffmpy_runtime_exception, KeyError(), IndexError()]

test/test_utils.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sys
66
import warnings
77
from pathlib import Path
8+
from typing import Sequence
89
from unittest.mock import MagicMock, patch
910

1011
import numpy as np
@@ -31,6 +32,7 @@
3132
get_function_params,
3233
get_type_hints,
3334
ipython_check,
35+
is_allowed_file,
3436
is_in_or_equal,
3537
is_special_typed_parameter,
3638
kaggle_check,
@@ -376,7 +378,7 @@ def create_path_string():
376378
return st.lists(
377379
st.one_of(
378380
st.text(
379-
alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-",
381+
alphabet="abcd",
380382
min_size=1,
381383
),
382384
st.just(".."),
@@ -387,6 +389,10 @@ def create_path_string():
387389
).map(lambda x: os.path.join(*x))
388390

389391

392+
def create_path_list():
393+
return st.lists(create_path_string(), min_size=0, max_size=5)
394+
395+
390396
def my_check(path_1, path_2):
391397
try:
392398
path_1 = Path(path_1).resolve()
@@ -414,6 +420,54 @@ def test_is_in_or_equal_fuzzer(path_1, path_2):
414420
pytest.fail(f"Exception raised: {e}")
415421

416422

423+
@settings(derandomize=os.getenv("CI") is not None)
424+
@given(
425+
path=create_path_string(),
426+
blocked_paths=create_path_list(),
427+
allowed_paths=create_path_list(),
428+
)
429+
def test_is_allowed_file_fuzzer(
430+
path: Path,
431+
blocked_paths: Sequence[Path],
432+
allowed_paths: Sequence[Path],
433+
):
434+
result, reason = is_allowed_file(path, blocked_paths, allowed_paths)
435+
436+
assert isinstance(result, bool)
437+
assert reason in [
438+
"in_blocklist",
439+
"allowed",
440+
"not_created_or_allowed",
441+
"created_by_app",
442+
]
443+
444+
if result:
445+
assert reason == "allowed"
446+
elif reason == "in_blocklist":
447+
assert any(is_in_or_equal(path, blocked_path) for blocked_path in blocked_paths)
448+
elif reason == "not_created_or_allowed":
449+
assert not any(
450+
is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths
451+
)
452+
453+
if reason == "allowed":
454+
assert any(is_in_or_equal(path, allowed_path) for allowed_path in allowed_paths)
455+
456+
457+
@pytest.mark.parametrize(
458+
"path,blocked_paths,allowed_paths",
459+
[
460+
("/a/foo.txt", ["/a"], ["/b"], False),
461+
("/b/foo.txt", ["/a"], ["/b"], True),
462+
("/a/../c/foo.txt", ["/c/"], ["/a/"], False),
463+
("/c/../a/foo.txt", ["/c/"], ["/a/"], True),
464+
("/c/foo.txt", ["/c/"], ["/c/foo.txt"], True),
465+
],
466+
)
467+
def is_allowed_file_corner_cases(path, blocked_paths, allowed_paths, result):
468+
assert is_allowed_file(path, blocked_paths, allowed_paths) == result
469+
470+
417471
# Additional test for known edge cases
418472
@pytest.mark.parametrize(
419473
"path_1,path_2,expected",

0 commit comments

Comments
 (0)