Skip to content

Commit 953418e

Browse files
authored
Merge pull request from GHSA-5xvc-vgmp-jgc3
Check for non-normalized usernames in password db on startup
2 parents 9e3db9c + a8e06c5 commit 953418e

File tree

2 files changed

+293
-18
lines changed

2 files changed

+293
-18
lines changed

firstuseauthenticator/firstuseauthenticator.py

+173-16
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55
password for that account. It is hashed with bcrypt & stored
66
locally in a dbm file, and checked next time they log in.
77
"""
8-
import dbm
98
import os
9+
import shutil
10+
11+
import bcrypt
12+
import dbm
1013
from jinja2 import ChoiceLoader, FileSystemLoader
1114
from jupyterhub.auth import Authenticator
1215
from jupyterhub.handlers import BaseHandler
@@ -16,8 +19,6 @@
1619
from tornado import web
1720
from traitlets.traitlets import Unicode, Bool, Integer
1821

19-
import bcrypt
20-
2122

2223
TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), 'templates')
2324

@@ -45,7 +46,6 @@ def __init__(self, *args, **kwargs):
4546
self._loaded = False
4647
super().__init__(*args, **kwargs)
4748

48-
4949
def _register_template_path(self):
5050
if self._loaded:
5151
return
@@ -59,14 +59,12 @@ def _register_template_path(self):
5959

6060
self._loaded = True
6161

62-
6362
@web.authenticated
6463
async def get(self):
6564
self._register_template_path()
6665
html = await self.render_template('reset.html')
6766
self.finish(html)
6867

69-
7068
@web.authenticated
7169
async def post(self):
7270
user = self.current_user
@@ -120,6 +118,164 @@ class FirstUseAuthenticator(Authenticator):
120118
"""
121119
)
122120

121+
check_passwords_on_startup = Bool(
122+
True,
123+
config=True,
124+
help="""
125+
Check for non-normalized-username passwords on startup.
126+
127+
Prior to 1.0, multiple passwords could be set for the same username,
128+
without normalization.
129+
130+
When True, duplicate usernames will be detected and removed,
131+
and ensure all usernames are normalized.
132+
133+
If any duplicates are found, a backup of the original is created,
134+
which can be inspected manually.
135+
136+
Typically, this will only need to run once.
137+
""",
138+
)
139+
140+
def __init__(self, **kwargs):
141+
super().__init__(**kwargs)
142+
if self.check_passwords_on_startup:
143+
self._check_passwords()
144+
145+
def _check_passwords(self):
146+
"""Validation checks on the password database at startup
147+
148+
Mainly checks for the presence of passwords for non-normalized usernames
149+
150+
If a username is present only in one non-normalized form,
151+
it will be renamed to the normalized form.
152+
153+
If multiple forms of the same normalized username are present,
154+
ensure that at least the normalized form is also present.
155+
It will continue to produce warnings until manual intervention removes the non-normalized entries.
156+
157+
Non-normalized entries will never be used during login.
158+
"""
159+
160+
# it's nontrival to check for db existence, because there are so many extensions
161+
# and you don't give dbm a path, you give it a *base* name,
162+
# which may point to one or more paths.
163+
# There's no way to retrieve the actual path(s) for a db
164+
dbm_extensions = ("", ".db", ".pag", ".dir", ".dat", ".bak")
165+
dbm_files = list(
166+
filter(os.path.isfile, (self.dbm_path + ext for ext in dbm_extensions))
167+
)
168+
if not dbm_files:
169+
# no database, nothing to do
170+
return
171+
172+
backup_path = self.dbm_path + "-backup"
173+
backup_files = list(
174+
filter(os.path.isfile, (backup_path + ext for ext in dbm_extensions))
175+
)
176+
177+
collision_warning = (
178+
f"Duplicate password entries have been found, and stored in {backup_path!r}."
179+
f" Duplicate entries have been removed from {self.dbm_path!r}."
180+
f" If you are happy with the solution, you can delete the backup file(s): {' '.join(backup_files)}."
181+
" Or you can inspect the backup database with:\n"
182+
" import dbm\n"
183+
f" with dbm.open({backup_path!r}, 'r') as db:\n"
184+
" for username in db.keys():\n"
185+
" print(username, db[username])\n"
186+
)
187+
188+
if backup_files:
189+
self.log.warning(collision_warning)
190+
return
191+
192+
# create a temporary backup of the passwords db
193+
# to be retained only if collisions are detected
194+
# or deleted if no collisions are detected
195+
backup_files = []
196+
for path in dbm_files:
197+
base, ext = os.path.splitext(path)
198+
if ext not in dbm_extensions:
199+
# catch weird names with '.' and no .db extension
200+
base = path
201+
ext = ""
202+
backup = f"{base}-backup{ext}"
203+
shutil.copyfile(path, backup)
204+
backup_files.append(backup)
205+
206+
collision_found = False
207+
208+
with dbm.open(self.dbm_path, "w") as db:
209+
# load the username:hashed_password dict
210+
passwords = {}
211+
for key in db.keys():
212+
passwords[key.decode("utf8")] = db[key]
213+
214+
# normalization map
215+
# compute the full map before checking in case two non-normalized forms are used
216+
# keys are normalized usernames,
217+
# values are lists of all names present in the db
218+
# which normalize to the same user
219+
normalized_usernames = {}
220+
for username in passwords:
221+
normalized_username = self.normalize_username(username)
222+
normalized_usernames.setdefault(normalized_username, []).append(
223+
username
224+
)
225+
226+
# check if any non-normalized usernames are in the db
227+
for normalized_username, usernames in normalized_usernames.items():
228+
# case 1. only one form, make sure it's stored in the normalized username
229+
if len(usernames) == 1:
230+
username = usernames[0]
231+
# case 1.a only normalized form, nothing to do
232+
if username == normalized_username:
233+
continue
234+
# 1.b only one form, not normalized. Unambiguous to fix.
235+
# move password from non-normalized to normalized.
236+
self.log.warning(
237+
f"Normalizing username in password db {username}->{normalized_username}"
238+
)
239+
db[normalized_username.encode("utf8")] = passwords[username]
240+
del db[username]
241+
else:
242+
# collision! Multiple passwords for the same Hub user with different normalization
243+
# do not clear these automatically because the 'right' answer is ambiguous,
244+
# but make sure the normalized_username is set,
245+
# so that after upgrade, there is always a password set
246+
# the non-normalized username passwords will never be used
247+
# after jupyterhub-firstuseauthenticator 1.0
248+
self.log.warning(
249+
f"{len(usernames)} variations of the username {normalized_username} present in password database: {usernames}."
250+
f" Only the password stored for the normalized {normalized_username} will be used."
251+
)
252+
collision_found = True
253+
if normalized_username not in passwords:
254+
# we choose usernames[0] as most likely to be the first entry
255+
# this isn't guaranteed, but it's the best information we have
256+
username = usernames[0]
257+
self.log.warning(
258+
f"Normalizing username in password db {username}->{normalized_username}"
259+
)
260+
db[normalized_username.encode("utf8")] = passwords[username]
261+
for username in usernames:
262+
if username != normalized_username:
263+
self.log.warning(
264+
f"Removing un-normalized username from password db {username}"
265+
)
266+
del db[username]
267+
268+
if collision_found:
269+
self.log.warning(collision_warning)
270+
else:
271+
# remove backup files, if we didn't find anything to backup
272+
self.log.debug(f"No collisions found, removing backup files {backup_files}")
273+
for path in backup_files:
274+
try:
275+
os.remove(path)
276+
except FileNotFoundError:
277+
pass
278+
123279
def _user_exists(self, username):
124280
"""
125281
Return true if given user already exists.
@@ -141,19 +297,19 @@ def validate_username(self, name):
141297
return super().validate_username(name)
142298

143299
async def authenticate(self, handler, data):
144-
username = self.normalize_username(data['username'])
145-
password = data['password']
300+
username = self.normalize_username(data["username"])
301+
password = data["password"]
146302

147303
if not self.create_users:
148304
if not self._user_exists(username):
149305
return None
150306

151307
with dbm.open(self.dbm_path, 'c', 0o600) as db:
152-
stored_pw = db.get(username.encode(), None)
308+
stored_pw = db.get(username.encode("utf8"), None)
153309

154310
if stored_pw is not None:
155311
# for existing passwords: ensure password hash match
156-
if bcrypt.hashpw(password.encode(), stored_pw) != stored_pw:
312+
if bcrypt.hashpw(password.encode("utf8"), stored_pw) != stored_pw:
157313
return None
158314
else:
159315
# for new users: ensure password validity and store password hash
@@ -164,7 +320,7 @@ async def authenticate(self, handler, data):
164320
)
165321
self.log.error(handler.custom_login_error)
166322
return None
167-
db[username] = bcrypt.hashpw(password.encode(), bcrypt.gensalt())
323+
db[username] = bcrypt.hashpw(password.encode("utf8"), bcrypt.gensalt())
168324

169325
return username
170326

@@ -181,7 +337,6 @@ def delete_user(self, user):
181337
except KeyError:
182338
pass
183339

184-
185340
def reset_password(self, username, new_password):
186341
"""
187342
This allows changing the password of a logged user.
@@ -194,12 +349,14 @@ def reset_password(self, username, new_password):
194349
self.log.error(login_err)
195350
# Resetting the password will fail if the new password is too short.
196351
return login_err
197-
with dbm.open(self.dbm_path, 'c', 0o600) as db:
198-
db[username] = bcrypt.hashpw(new_password.encode(), bcrypt.gensalt())
352+
with dbm.open(self.dbm_path, "c", 0o600) as db:
353+
db[username] = bcrypt.hashpw(new_password.encode("utf8"), bcrypt.gensalt())
199354
login_msg = "Your password has been changed successfully!"
200355
self.log.info(login_msg)
201356
return login_msg
202357

203-
204358
def get_handlers(self, app):
205-
return [(r'/login', CustomLoginHandler), (r'/auth/change-password', ResetPasswordHandler)]
359+
return [
360+
(r"/login", CustomLoginHandler),
361+
(r"/auth/change-password", ResetPasswordHandler),
362+
]

0 commit comments

Comments
 (0)