Skip to content

Commit 49ca39c

Browse files
authored
Merge pull request #226 from appsembler/john/improve-enroll-metrics-pipeline-perf
Updates pipeline enrollment metrics queries to improve performance
2 parents d96dd9b + d596ffa commit 49ca39c

File tree

2 files changed

+88
-28
lines changed

2 files changed

+88
-28
lines changed

figures/pipeline/enrollment_metrics.py

+14-9
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@
5656
from figures.pipeline.logger import log_error
5757
from figures.sites import (
5858
get_site_for_course,
59+
get_student_modules_for_course_in_site,
5960
course_enrollments_for_course,
60-
student_modules_for_course_enrollment,
6161
UnlinkedCourseError,
6262
)
6363

@@ -81,8 +81,16 @@ def bulk_calculate_course_progress_data(course_id, date_for=None):
8181
if not date_for:
8282
date_for = datetime.utcnow().replace(tzinfo=utc).date()
8383

84+
site = get_site_for_course(course_id)
85+
if not site:
86+
raise UnlinkedCourseError('No site found for course "{}"'.format(course_id))
87+
88+
course_sm = get_student_modules_for_course_in_site(site=site,
89+
course_id=course_id)
8490
for ce in course_enrollments_for_course(course_id):
85-
metrics = collect_metrics_for_enrollment(course_enrollment=ce,
91+
metrics = collect_metrics_for_enrollment(site=site,
92+
course_enrollment=ce,
93+
course_sm=course_sm,
8694
date_for=date_for)
8795
if metrics:
8896
progress_percentages.append(metrics.progress_percent)
@@ -118,7 +126,7 @@ def calculate_average_progress(progress_percentages):
118126
return average_progress
119127

120128

121-
def collect_metrics_for_enrollment(course_enrollment, date_for=None, **_kwargs):
129+
def collect_metrics_for_enrollment(site, course_enrollment, course_sm, date_for=None):
122130
"""Collect metrics for enrollment (learner+course)
123131
124132
This function performs course enrollment merics collection for the given
@@ -144,17 +152,14 @@ def collect_metrics_for_enrollment(course_enrollment, date_for=None, **_kwargs):
144152
"""
145153
if not date_for:
146154
date_for = datetime.utcnow().replace(tzinfo=utc).date()
147-
site = get_site_for_course(course_enrollment.course_id)
148-
if not site:
149-
raise UnlinkedCourseError('No site found for course "{}"'.format(
150-
course_enrollment.course_id))
151155

152156
# The following are two different ways to avoide the dreaded error
153157
# "Instance of 'list' has no 'order_by' member (no-member)"
154158
# See: https://github.com/PyCQA/pylint-django/issues/165
155-
student_modules = student_modules_for_course_enrollment(ce=course_enrollment)
159+
student_modules = course_sm.filter(
160+
student_id=course_enrollment.user.id).order_by('-modified')
156161
if student_modules:
157-
most_recent_sm = student_modules.latest('modified')
162+
most_recent_sm = student_modules[0]
158163
else:
159164
most_recent_sm = None
160165

tests/pipeline/test_enrollment_metrics.py

+74-19
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99
from courseware.models import StudentModule
1010

11+
from figures.helpers import is_multisite
1112
from figures.models import LearnerCourseGradeMetrics
12-
1313
from figures.pipeline.enrollment_metrics import (
1414
calculate_average_progress,
1515
bulk_calculate_course_progress_data,
@@ -18,6 +18,7 @@
1818
_add_enrollment_metrics_record,
1919
_collect_progress_data,
2020
)
21+
from figures.sites import UnlinkedCourseError
2122

2223
from tests.factories import (
2324
CourseEnrollmentFactory,
@@ -49,12 +50,56 @@ def test_bulk_calculate_course_progress_data_happy_path(db, monkeypatch):
4950
def mock_metrics(course_enrollment, **_kwargs):
5051
return mapping[course_enrollment.course_id]
5152

53+
monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course',
54+
lambda val: SiteFactory())
5255
monkeypatch.setattr('figures.pipeline.enrollment_metrics.collect_metrics_for_enrollment',
5356
mock_metrics)
5457
data = bulk_calculate_course_progress_data(course_overview.id)
5558
assert data['average_progress'] == 0.5
5659

5760

61+
@pytest.mark.skipif(not is_multisite(),
62+
reason='Standalone always returns a site')
63+
@pytest.mark.django_db
64+
def test_bulk_calculate_course_progress_unlinked_course_error(db, monkeypatch):
65+
"""Tests 'bulk_calculate_course_progress_data' function
66+
67+
The function under test iterates over a set of course enrollment records,
68+
So we create a couple of records to iterate over and mock the collect
69+
function
70+
"""
71+
course_overview = CourseOverviewFactory()
72+
course_enrollments = [CourseEnrollmentFactory(
73+
course_id=course_overview.id) for i in range(2)]
74+
mapping = {ce.course_id: LearnerCourseGradeMetricsFactory(
75+
course_id=str(ce.course_id),
76+
user=ce.user,
77+
sections_worked=1,
78+
sections_possible=2) for ce in course_enrollments
79+
}
80+
81+
def mock_metrics(course_enrollment, **_kwargs):
82+
return mapping[course_enrollment.course_id]
83+
84+
monkeypatch.setattr('figures.pipeline.enrollment_metrics.collect_metrics_for_enrollment',
85+
mock_metrics)
86+
with pytest.raises(UnlinkedCourseError) as e_info:
87+
data = bulk_calculate_course_progress_data(course_overview.id)
88+
89+
# assert data['average_progress'] == 0.5
90+
91+
92+
@pytest.mark.django_db
93+
def test_bulk_calculate_course_progress_no_enrollments(db, monkeypatch):
94+
"""This tests when there is a new course with no enrollments
95+
"""
96+
monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course',
97+
lambda val: SiteFactory())
98+
course_overview = CourseOverviewFactory()
99+
data = bulk_calculate_course_progress_data(course_overview.id)
100+
assert data['average_progress'] == 0.0
101+
102+
58103
@pytest.mark.parametrize('progress_percentages, expected_result', [
59104
(None, 0.0),
60105
([], 0.0),
@@ -88,6 +133,7 @@ class TestCollectMetricsForEnrollment(object):
88133
"""
89134
@pytest.fixture(autouse=True)
90135
def setup(self, db):
136+
self.site = SiteFactory()
91137
self.date_1 = datetime(2020, 2, 2, tzinfo=utc)
92138
self.date_2 = self.date_1 + relativedelta(months=1) # future of date_1
93139
self.course_enrollment = CourseEnrollmentFactory()
@@ -131,12 +177,14 @@ def test_needs_update_no_lcgm(self, monkeypatch):
131177
"""
132178
assert not LearnerCourseGradeMetrics.objects.count()
133179
monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course',
134-
lambda val: SiteFactory())
180+
lambda val: self.site)
135181
monkeypatch.setattr('figures.pipeline.enrollment_metrics._collect_progress_data',
136182
lambda val: self.progress_data)
137-
metrics = collect_metrics_for_enrollment(
138-
course_enrollment=self.course_enrollment,
139-
)
183+
184+
course_sm = StudentModule.objects.filter(course_id=self.course_enrollment.course_id)
185+
metrics = collect_metrics_for_enrollment(site=self.site,
186+
course_enrollment=self.course_enrollment,
187+
course_sm=course_sm)
140188

141189
self.check_response_metrics(metrics, self.progress_data)
142190
assert LearnerCourseGradeMetrics.objects.count() == 1
@@ -150,12 +198,13 @@ def test_needs_update_has_lcgm(self, monkeypatch):
150198
"""
151199
lcgm = self.create_lcgm(date_for=self.date_1)
152200
monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course',
153-
lambda val: SiteFactory())
201+
lambda val: self.site)
154202
monkeypatch.setattr('figures.pipeline.enrollment_metrics._collect_progress_data',
155203
lambda val: self.progress_data)
156-
metrics = collect_metrics_for_enrollment(
157-
course_enrollment=self.course_enrollment,
158-
)
204+
course_sm = StudentModule.objects.filter(course_id=self.course_enrollment.course_id)
205+
metrics = collect_metrics_for_enrollment(site=self.site,
206+
course_enrollment=self.course_enrollment,
207+
course_sm=course_sm)
159208

160209
self.check_response_metrics(metrics, self.progress_data)
161210
assert metrics != lcgm
@@ -173,12 +222,13 @@ def test_does_not_need_update(self, monkeypatch):
173222
"""
174223
lcgm = self.create_lcgm(date_for=self.date_2.date())
175224
monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course',
176-
lambda val: SiteFactory())
225+
lambda val: self.site)
177226
monkeypatch.setattr('figures.pipeline.enrollment_metrics._collect_progress_data',
178227
lambda val: self.progress_data)
179-
metrics = collect_metrics_for_enrollment(
180-
course_enrollment=self.course_enrollment,
181-
)
228+
course_sm = StudentModule.objects.filter(course_id=self.course_enrollment.course_id)
229+
metrics = collect_metrics_for_enrollment(site=self.site,
230+
course_enrollment=self.course_enrollment,
231+
course_sm=course_sm)
182232

183233
self.check_response_metrics(metrics, self.progress_data)
184234
assert metrics == lcgm
@@ -190,13 +240,15 @@ def test_no_update_no_lcgm_no_sm(self, monkeypatch):
190240
The function under test should return None
191241
"""
192242
monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course',
193-
lambda val: SiteFactory())
243+
lambda val: self.site)
194244
monkeypatch.setattr('figures.pipeline.enrollment_metrics._collect_progress_data',
195245
lambda val: self.progress_data)
196246
# Create a course enrollment for which we won't have student module records
197-
metrics = collect_metrics_for_enrollment(
198-
course_enrollment=CourseEnrollmentFactory(),
199-
)
247+
ce = CourseEnrollmentFactory()
248+
course_sm = StudentModule.objects.filter(course_id=ce.course_id)
249+
metrics = collect_metrics_for_enrollment(site=self.site,
250+
course_enrollment=ce,
251+
course_sm=course_sm)
200252
assert not metrics
201253

202254
def test_no_update_has_lcgm_no_sm(self, monkeypatch):
@@ -205,13 +257,16 @@ def test_no_update_has_lcgm_no_sm(self, monkeypatch):
205257
The function under test should return the existing LCGM
206258
"""
207259
monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course',
208-
lambda val: SiteFactory())
260+
lambda val: self.site)
209261
monkeypatch.setattr('figures.pipeline.enrollment_metrics._collect_progress_data',
210262
lambda val: self.progress_data)
211263
# Create a course enrollment for which we won't have student module records
212264
ce = CourseEnrollmentFactory()
213265
lcgm = LearnerCourseGradeMetricsFactory(course_id=ce.course_id, user=ce.user)
214-
metrics = collect_metrics_for_enrollment(course_enrollment=ce)
266+
course_sm = StudentModule.objects.filter(course_id=ce.course_id)
267+
metrics = collect_metrics_for_enrollment(site=self.site,
268+
course_enrollment=ce,
269+
course_sm=course_sm)
215270
assert metrics == lcgm
216271
assert not StudentModule.objects.filter(course_id=ce.course_id)
217272

0 commit comments

Comments
 (0)