Skip to content

Add method to sanitize urls #218

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions tests/test_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,36 @@ def test_setting_destination_and_upload(self):
"MAM/TO_OE",
"MAM/TO_OE/pending",
)

class TestSanitizeURL:
def test_removes_query_params(self):
"""
Removes query params from the URL.
"""
url = "https://example.com/path?query=param"
sanitized_url = urls.sanitize_url(url)
assert sanitized_url == "https://example.com/"

def test_removes_fragment(self):
"""
Removes fragment from the URL.
"""
url = "https://example.com/path#fragment"
sanitized_url = urls.sanitize_url(url)
assert sanitized_url == "https://example.com/"

def test_removes_auth(self):
"""
Removes auth from the URL.
"""
url = "https://user:test@localhost:8080/path"
sanitized_url = urls.sanitize_url(url)
assert sanitized_url == "https://localhost:8080/"

def test_removes_path(self):
"""
Removes path from the URL.
"""
url = "https://example.com/path/"
sanitized_url = urls.sanitize_url(url)
assert sanitized_url == "https://example.com/"
29 changes: 28 additions & 1 deletion xocto/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import os
from urllib import parse

from urllib3 import exceptions as urllib3_exceptions
from urllib3 import util as urllib3_util

def pop_url_query_param(url: str, key: str) -> tuple[str, str | None]:
"""
Expand Down Expand Up @@ -144,3 +145,29 @@ def _fix_url_scheme(*, old_url: str, new_url: str) -> str:
segments = new_url.split(":", maxsplit=1)
new_url = segments[0] + "://" + segments[1]
return new_url

def sanitize_url(url: str) -> str | None:
"""
Sanitizes the URL by removing the auth, path, query and fragment parts of the URL. `None` is
returned if the URL is invalid.

E.g.
>>> sanitize_url('https://user:pass@localhost:8080/path?query#fragment')
'https://localhost:8080/'
>>> sanitize_url('ftp://example.com:21')
'ftp://example.com:21/'
>>> sanitize_url('invalid-url')
None
"""
try:
scheme, _auth, host, port, _path, _query, _fragment = urllib3_util.parse_url(url)

if host is None:
return None

scheme = f"{scheme}://" if scheme is not None else ""
port = f":{port}" if port is not None else ""

return f"{scheme}{host}{port}/"
except urllib3_exceptions.LocationParseError:
return None