Skip to content

Commit 71ed381

Browse files
committed
Issue webcompat#767: fixes normalize_url bug when url is missing slashes
1 parent d336e69 commit 71ed381

File tree

2 files changed

+54
-1
lines changed

2 files changed

+54
-1
lines changed

tests/test_form.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
'''Tests for form validation.'''
5+
6+
import unittest
7+
from webcompat import form
8+
9+
10+
class TestForm(unittest.TestCase):
11+
12+
def test_normalize_url(self):
13+
14+
r = form.normalize_url('example.com')
15+
self.assertEqual(r, 'http://example.com/')
16+
17+
r = form.normalize_url('http:/example.com')
18+
self.assertEqual(r, 'http://example.com/')
19+
20+
r = form.normalize_url('https:/example.com')
21+
self.assertEqual(r, 'https://example.com/')
22+
23+
r = form.normalize_url('http:example.com')
24+
self.assertEqual(r, 'http://example.com/')
25+
26+
r = form.normalize_url('https:example.com')
27+
self.assertEqual(r, 'https://example.com/')
28+
29+
30+
def test_domain_name(self):
31+
32+
r = form.domain_name("http://example.com")
33+
self.assertEqual(r, "example.com")
34+
35+
r = form.domain_name("https://example.com")
36+
self.assertEqual(r, "example.com")

webcompat/form.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
AUTH_REPORT = 'github-auth-report'
2626
PROXY_REPORT = 'github-proxy-report'
2727
SCHEMES = ('http://', 'https://')
28+
BAD_SCHEMES = ('http:/', 'https:/', 'http:', 'https:')
2829

2930
problem_choices = [
3031
(u'detection_bug', u'Desktop site instead of mobile site'),
@@ -118,9 +119,25 @@ def get_labels(browser_name):
118119
def normalize_url(url):
119120
'''normalize URL for consistency.'''
120121
url = url.strip()
121-
if not url.startswith(SCHEMES):
122+
parsed = urlparse.urlparse(url)
123+
124+
if url.startswith(BAD_SCHEMES):
125+
# if url starts with a bad scheme, parsed.netloc will be empty,
126+
# so we use parsed.path instead
127+
path = parsed.path.lstrip('/')
128+
url = '%s://%s' % (parsed.scheme, path)
129+
if parsed.query:
130+
url += '?' + parsed.query
131+
if parsed.fragment:
132+
url += '#' + parsed.fragment
133+
elif not parsed.scheme:
122134
# We assume that http is missing not https
123135
url = 'http://%s' % (url)
136+
137+
# if url does not contain a path, ensure it has a trailing slash
138+
if not urlparse.urlparse(url).path:
139+
url += "/"
140+
124141
return url
125142

126143

0 commit comments

Comments
 (0)