Skip to content

Commit cedbdeb

Browse files
committed
middleware cleanup - no trailing slashes on canonical urls
1 parent b7e99bb commit cedbdeb

File tree

1 file changed

+88
-140
lines changed

1 file changed

+88
-140
lines changed

openstax/middleware.py

+88-140
Original file line numberDiff line numberDiff line change
@@ -14,189 +14,137 @@
1414
from pages.models import HomePage, Supporters, PrivacyPolicy, K12Subject, Subject, Subjects, RootPage
1515

1616

17-
class HttpSmartRedirectResponse(HttpResponsePermanentRedirect):
18-
pass
19-
20-
2117
class CommonMiddlewareAppendSlashWithoutRedirect(CommonMiddleware):
22-
""" This class converts HttpSmartRedirectResponse to the common response
23-
of Django view, without redirect. This is necessary to match status_codes
24-
for urls like /url?q=1 and /url/?q=1. If you don't use it, you will have 302
25-
code always on pages without slash.
18+
""" This class converts HttpResponsePermanentRedirect to the common response
19+
of Django view, without redirect. This is necessary to match status_codes
20+
for urls like /url?q=1 and /url/?q=1. If you don't use it, you will have 302
21+
code always on pages without slash.
2622
"""
27-
response_redirect_class = HttpSmartRedirectResponse
23+
response_redirect_class = HttpResponsePermanentRedirect
2824

29-
def __init__(self, *args, **kwargs):
30-
# create django request resolver
31-
self.handler = BaseHandler()
25+
def __init__(self, *args, **kwargs):
26+
# create django request resolver
27+
self.handler = BaseHandler()
3228

33-
# prevent recursive includes
34-
old = settings.MIDDLEWARE
35-
name = self.__module__ + '.' + self.__class__.__name__
36-
settings.MIDDLEWARE = [i for i in settings.MIDDLEWARE if i != name]
29+
# prevent recursive includes
30+
old = settings.MIDDLEWARE
31+
name = self.__module__ + '.' + self.__class__.__name__
32+
settings.MIDDLEWARE = [i for i in settings.MIDDLEWARE if i != name]
3733

38-
self.handler.load_middleware()
34+
self.handler.load_middleware()
3935

40-
settings.MIDDLEWARE = old
41-
super(CommonMiddlewareAppendSlashWithoutRedirect, self).__init__(*args, **kwargs)
36+
settings.MIDDLEWARE = old
37+
super().__init__(*args, **kwargs)
4238

43-
def get_full_path_with_slash(self, request):
44-
""" Return the full path of the request with a trailing slash appended
45-
without Exception in Debug mode
46-
"""
47-
new_path = request.get_full_path(force_append_slash=True)
48-
# Prevent construction of scheme relative urls.
49-
new_path = escape_leading_slashes(new_path)
50-
return new_path
39+
def get_full_path_with_slash(self, request):
40+
""" Return the full path of the request with a trailing slash appended
41+
without Exception in Debug mode
42+
"""
43+
# Prevent construction of scheme relative urls.
44+
new_path = request.get_full_path(force_append_slash=True)
45+
new_path = escape_leading_slashes(new_path)
46+
return new_path
5147

52-
def process_response(self, request, response):
53-
response = super(CommonMiddlewareAppendSlashWithoutRedirect, self).process_response(request, response)
48+
def process_response(self, request, response):
49+
response = super().process_response(request, response)
5450

55-
if isinstance(response, HttpSmartRedirectResponse):
56-
if not request.path.endswith('/'):
57-
request.path = request.path + '/'
58-
# we don't need query string in path_info because it's in request.GET already
59-
request.path_info = request.path
60-
response = self.handler.get_response(request)
51+
if isinstance(response, HttpResponsePermanentRedirect):
52+
if not request.path.endswith('/'):
53+
request.path = request.path + '/'
54+
# we don't need query string in path_info because it's in request.GET already
55+
response = self.handler.get_response(request)
6156

62-
return response
57+
return response
6358

6459

6560
class CommonMiddlewareOpenGraphRedirect(CommonMiddleware):
66-
OG_USER_AGENTS = [
67-
'baiduspider',
68-
'bingbot',
69-
'embedly',
70-
'facebookbot',
71-
'facebookexternalhit/1.1',
72-
'facebookexternalhit',
73-
'facebot',
74-
'google.*snippet',
75-
'googlebot',
76-
'linkedinbot',
77-
'MetadataScraper',
78-
'outbrain',
79-
'pinterest',
80-
'pinterestbot',
81-
'quora',
82-
'quora link preview',
83-
'rogerbot',
84-
'showyoubot',
85-
'slackbot',
86-
'slackbot-linkexpanding',
87-
'twitterbot',
88-
'vkShare',
89-
'W3C_Validator',
90-
'WhatsApp',
91-
'MetadataScraper',
92-
'yandex',
93-
'yahoo',
94-
]
61+
OG_USER_AGENTS = {
62+
'baiduspider', 'bingbot', 'embedly', 'facebookbot', 'facebookexternalhit/1.1',
63+
'facebookexternalhit', 'facebot', 'google.*snippet', 'googlebot', 'linkedinbot',
64+
'MetadataScraper', 'outbrain', 'pinterest', 'pinterestbot', 'quora', 'quora link preview',
65+
'rogerbot', 'showyoubot', 'slackbot', 'slackbot-linkexpanding', 'twitterbot', 'vkShare',
66+
'W3C_Validator', 'WhatsApp', 'yandex', 'yahoo'
67+
}
9568

9669
def __init__(self, get_response):
9770
self.get_response = get_response
9871

9972
def __call__(self, request, *args, **kwargs):
10073
if 'HTTP_USER_AGENT' in request.META:
101-
10274
user_agent = user_agent_parser.ParseUserAgent(request.META["HTTP_USER_AGENT"])
10375
if user_agent['family'].lower() in self.OG_USER_AGENTS:
104-
# url path minus the trailing /
10576
url_path = unquote(request.get_full_path()[:-1])
106-
10777
full_url = unquote(request.build_absolute_uri())
108-
109-
# index of last / to find slug, except when there isn't a last /
110-
if url_path == '':
111-
page_slug = "home"
112-
else:
113-
index = url_path.rindex('/')
114-
page_slug = url_path[index+1:]
78+
page_slug = "home" if url_path == '' else url_path.rsplit('/', 1)[-1]
11579

11680
if self.redirect_path_found(url_path):
117-
# supporters page has the wrong slug
11881
if page_slug == 'foundation':
11982
page_slug = 'supporters'
12083

121-
# look up correct object based on path
122-
if '/details/books/' in url_path:
123-
page = Book.objects.filter(slug=page_slug)
124-
elif '/blog/' in url_path:
125-
page = NewsArticle.objects.filter(slug=page_slug)
126-
elif '/privacy' in url_path:
127-
page = PrivacyPolicy.objects.filter(slug='privacy-policy')
128-
elif '/k12' in url_path:
129-
page = K12Subject.objects.filter(slug='k12-' + page_slug)
130-
elif '/subjects' in url_path:
131-
flag = FeatureFlag.objects.filter(name='new_subjects')
132-
if flag[0].feature_active:
133-
if page_slug == 'subjects':
134-
page_slug = 'new-subjects'
135-
page = Subjects.objects.filter(slug=page_slug)
136-
else:
137-
page = Subject.objects.filter(slug=page_slug+'-books')
138-
else:
139-
page_slug = 'subjects'
140-
page = BookIndex.objects.filter(slug=page_slug)
141-
else:
142-
page = self.page_by_slug(page_slug)
143-
84+
page = self.get_page(url_path, page_slug)
14485
if page:
14586
template = self.build_template(page[0], full_url)
14687
return HttpResponse(template)
147-
else:
148-
return self.get_response(request)
149-
else:
150-
return self.get_response(request)
15188
return self.get_response(request)
15289

90+
def get_page(self, url_path, page_slug):
91+
if '/details/books/' in url_path:
92+
return Book.objects.filter(slug=page_slug)
93+
elif '/blog/' in url_path:
94+
return NewsArticle.objects.filter(slug=page_slug)
95+
elif '/privacy' in url_path:
96+
return PrivacyPolicy.objects.filter(slug='privacy-policy')
97+
elif '/k12' in url_path:
98+
return K12Subject.objects.filter(slug='k12-' + page_slug)
99+
elif '/subjects' in url_path:
100+
flag = FeatureFlag.objects.filter(name='new_subjects')
101+
if flag[0].feature_active:
102+
if page_slug == 'subjects':
103+
page_slug = 'new-subjects'
104+
return Subjects.objects.filter(slug=page_slug)
105+
else:
106+
return Subject.objects.filter(slug=page_slug + '-books')
107+
else:
108+
return BookIndex.objects.filter(slug='subjects')
109+
else:
110+
return self.page_by_slug(page_slug)
111+
153112
def build_template(self, page, page_url):
113+
page_url = page_url.rstrip('/')
154114
image_url = self.image_url(page.promote_image)
155-
template = '<!DOCTYPE html> <html> <head> <meta charset="utf-8">\n'
156-
template += ' <title>' + str(page.title) + '</title>\n'
157-
template += ' <meta name="description" content="{}" >\n'.format(page.search_description)
158-
template += ' <link rel="canonical" href="{}" />\n'.format(page_url)
159-
template += ' <meta property="og:url" content="{}" />\n'.format(page_url)
160-
template += ' <meta property="og:type" content="article" />\n'
161-
template += ' <meta property="og:title" content="{}" />\n'.format(page.title)
162-
template += ' <meta property="og:description" content="{}" />\n'.format(page.search_description)
163-
template += ' <meta property="og:image" content="{}" />\n'.format(image_url)
164-
template += ' <meta property="og:image:alt" content="{}" />\n'.format(page.title)
165-
template += ' <meta name="twitter:card" content="summary_large_image">\n'
166-
template += ' <meta name="twitter:site" content="@OpenStax">\n'
167-
template += ' <meta name="twitter:title" content="{}">\n'.format(page.title)
168-
template += ' <meta name="twitter:description" content="{}">\n'.format(page.search_description)
169-
template += ' <meta name="twitter:image" content="{}">\n'.format(image_url)
170-
template += ' <meta name="twitter:image:alt" content="OpenStax">\n'
171-
template += '</head><body></body></html>'
172-
return template
115+
return f'''<!DOCTYPE html>
116+
<html>
117+
<head>
118+
<meta charset="utf-8">
119+
<title>{page.title}</title>
120+
<meta name="description" content="{page.search_description}">
121+
<link rel="canonical" href="{page_url}">
122+
<meta property="og:url" content="{page_url}">
123+
<meta property="og:type" content="article">
124+
<meta property="og:title" content="{page.title}">
125+
<meta property="og:description" content="{page.search_description}">
126+
<meta property="og:image" content="{image_url}">
127+
<meta property="og:image:alt" content="{page.title}">
128+
<meta name="twitter:card" content="summary_large_image">
129+
<meta name="twitter:site" content="@OpenStax">
130+
<meta name="twitter:title" content="{page.title}">
131+
<meta name="twitter:description" content="{page.search_description}">
132+
<meta name="twitter:image" content="{image_url}">
133+
<meta name="twitter:image:alt" content="OpenStax">
134+
</head>
135+
<body></body>
136+
</html>'''
173137

174138
def redirect_path_found(self, url_path):
175-
if '/blog/' in url_path or '/details/books/' in url_path or '/foundation' in url_path or '/privacy' in url_path or '/subjects' in url_path or '' == url_path:
176-
return True
177-
elif '/k12' in url_path:
178-
last_slash = url_path.rfind('/')
179-
k12_index = url_path.rfind('k12')
180-
if last_slash < k12_index:
181-
return False
182-
else:
183-
return True
184-
else:
185-
return False
139+
return any(substring in url_path for substring in ['/blog/', '/details/books/', '/foundation', '/privacy', '/subjects', '']) or '/k12' in url_path
186140

187141
def image_url(self, image):
188-
image_url = build_image_url(image)
189-
if not image_url:
190-
return ''
191-
return image_url
142+
return build_image_url(image) or ''
192143

193144
def page_by_slug(self, page_slug):
194145
if page_slug == 'supporters':
195146
return Supporters.objects.all()
196147
if page_slug == 'openstax-homepage':
197148
return HomePage.objects.filter(locale=1)
198149
if page_slug == 'home':
199-
return RootPage.objects.filter(locale=1)
200-
201-
202-
150+
return RootPage.objects.filter(locale=1)

0 commit comments

Comments
 (0)