Skip to content

Commit 21d88df

Browse files
committed
Added 'mau' collection to Figures pipeline
The purpose of this is to improve API performance. See the previous commit with SHA 92ea8c4 for an explanation. This commit adds the pipeline code to populate 'SiteDailyMetrics.mau' with the latest numbers for the month as of yesterday We also clean up the old live MAU collection from StudentModule, removing dead coe and the tests that call the dead code
1 parent 92ea8c4 commit 21d88df

File tree

7 files changed

+151
-70
lines changed

7 files changed

+151
-70
lines changed

figures/mau.py

+34
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,40 @@ def retrieve_live_course_mau_data(site, course_id):
7373
)
7474

7575

76+
def mau_1g_for_month_as_of_day(sm_queryset, date_for):
77+
"""Get the MAU from the sm, as of the "date_for" for "date_for" month
78+
79+
sm_queryset is the StudentModule queryset for our source
80+
81+
This is a MAU 1G function that calculates the monthly active users as of the
82+
day in the given month.
83+
84+
This function queries `courseware.models.StudentModule` to identify users
85+
who are active in the site
86+
87+
Returns a queryset of distinct user ids
88+
"""
89+
month_sm = sm_queryset.filter(modified__year=date_for.year,
90+
modified__month=date_for.month,
91+
modified__day__lte=date_for.day)
92+
return month_sm.values('student__id').distinct()
93+
94+
95+
def site_mau_1g_for_month_as_of_day(site, date_for):
96+
"""Get the MAU for the given site, as of the "date_for" in the month
97+
98+
This is a conenvience function. It gets the student modules for the site,
99+
then calls
100+
101+
`figures.mau.mau_for_month_as_of_day(...)`
102+
103+
Returns a queryset with distinct user ids
104+
"""
105+
site_sm = get_student_modules_for_site(site)
106+
return mau_1g_for_month_as_of_day(sm_queryset=site_sm,
107+
date_for=date_for)
108+
109+
76110
def store_mau_metrics(site, overwrite=False):
77111
"""
78112
Save "snapshot" of MAU metrics

figures/metrics.py

+4-17
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,10 @@ def get_site_mau_history_metrics(site, months_back):
253253
month=str(rec.month_for.month).zfill(2))
254254
history.append(dict(period=period, value=rec.active_user_count))
255255

256-
# Hack to set current month data in the history list
257-
current_month_active = get_site_mau_current_month(site)
256+
# Get our latest stored site MAU count
257+
sdm = SiteDailyMetrics.latest_previous_record(site=site)
258+
current_month_active = sdm.mau if sdm else 0
259+
258260
if history:
259261
# reverse the list because it is currently in reverser chronological order
260262
history.reverse()
@@ -266,21 +268,6 @@ def get_site_mau_history_metrics(site, months_back):
266268
return dict(current_month=current_month_active, history=history)
267269

268270

269-
def get_site_mau_current_month(site):
270-
"""Specific function to get the live active users for the current month
271-
272-
Developers note: We're starting with the simple aproach for MAU 1G, first
273-
generation. When we update for MAU 2G, we will be able to make the query
274-
more efficient by pulling unique users from a single table used for live
275-
capture.
276-
"""
277-
month_for = datetime.datetime.utcnow()
278-
site_sm = figures.sites.get_student_modules_for_site(site)
279-
curr_sm = site_sm.filter(modified__year=month_for.year,
280-
modified__month=month_for.month)
281-
return curr_sm.values('student__id').distinct().count()
282-
283-
284271
def get_active_users_for_time_period(site, start_date, end_date, course_ids=None):
285272
"""
286273
Returns the number of users active in the time period.

figures/pipeline/site_daily_metrics.py

+5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from django.db.models import Sum
1313

1414
from figures.helpers import as_course_key, as_datetime, next_day, prev_day
15+
from figures.mau import site_mau_1g_for_month_as_of_day
1516
from figures.models import CourseDailyMetrics, SiteDailyMetrics
1617
from figures.sites import (
1718
get_courses_for_site,
@@ -133,12 +134,15 @@ def extract(self, site, date_for=None, **kwargs): # pylint: disable=unused-argu
133134

134135
todays_active_users = get_site_active_users_for_date(site, date_for)
135136
todays_active_user_count = todays_active_users.count()
137+
mau = site_mau_1g_for_month_as_of_day(site, date_for)
138+
136139
data['todays_active_user_count'] = todays_active_user_count
137140
data['cumulative_active_user_count'] = get_previous_cumulative_active_user_count(
138141
site, date_for) + todays_active_user_count
139142
data['total_user_count'] = user_count
140143
data['course_count'] = course_count
141144
data['total_enrollment_count'] = get_total_enrollment_count(site, date_for)
145+
data['mau'] = mau.count()
142146
return data
143147

144148

@@ -186,6 +190,7 @@ def load(self, site, date_for=None, force_update=False, **_kwargs):
186190
total_user_count=data['total_user_count'],
187191
course_count=data['course_count'],
188192
total_enrollment_count=data['total_enrollment_count'],
193+
mau=data['mau'],
189194
)
190195
)
191196
return site_metrics, created
+7-50
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,22 @@
11
"""
22
"""
33

4-
from datetime import date, datetime
5-
from factory import fuzzy
4+
from datetime import date
65
from freezegun import freeze_time
76

87
from dateutil.rrule import rrule, MONTHLY
98
from dateutil.relativedelta import relativedelta
109

1110
import pytest
1211

13-
from figures.metrics import (
14-
get_site_mau_history_metrics,
15-
get_site_mau_current_month
16-
)
12+
from figures.metrics import get_site_mau_history_metrics
1713
from figures.models import SiteMonthlyMetrics
1814

1915
from tests.factories import (
20-
CourseOverviewFactory,
21-
OrganizationFactory,
22-
OrganizationCourseFactory,
16+
SiteDailyMetricsFactory,
2317
SiteMonthlyMetricsFactory,
2418
SiteFactory,
25-
StudentModuleFactory,
26-
UserFactory,
2719
)
28-
from tests.helpers import organizations_support_sites
29-
30-
if organizations_support_sites():
31-
from tests.factories import UserOrganizationMappingFactory
3220

3321

3422
@pytest.mark.django_db
@@ -45,6 +33,7 @@ def test_get_site_mau_history_metrics_basic(db, monkeypatch):
4533
{'period': '2020/05', 'value': 11},
4634
{'period': '2020/06', 'value': 12}]}
4735
36+
TODO: We want to revisit both this test and the function under test
4837
"""
4938
all_months_back = 12
5039
months_back = 6
@@ -68,8 +57,9 @@ def test_get_site_mau_history_metrics_basic(db, monkeypatch):
6857
active_user_count=counter))
6958

7059
current_month_active = 42
71-
monkeypatch.setattr('figures.metrics.get_site_mau_current_month',
72-
lambda n: current_month_active)
60+
SiteDailyMetricsFactory(site=our_site,
61+
date_for=mock_today - relativedelta(day=2),
62+
mau=current_month_active)
7363

7464
data = get_site_mau_history_metrics(site=our_site, months_back=months_back)
7565

@@ -82,36 +72,3 @@ def test_get_site_mau_history_metrics_basic(db, monkeypatch):
8272
obj = SiteMonthlyMetrics.objects.get(site=our_site, month_for=month_for)
8373
assert obj.active_user_count == rec['value']
8474
assert obj.site == our_site
85-
86-
87-
@pytest.mark.django_db
88-
def test_get_site_mau_current_month(db):
89-
90-
mock_today = date(year=2020, month=3, day=1)
91-
freezer = freeze_time(mock_today)
92-
freezer.start()
93-
94-
start_dt = datetime(mock_today.year, mock_today.month, 1, tzinfo=fuzzy.compat.UTC)
95-
end_dt = datetime(mock_today.year, mock_today.month, 31, tzinfo=fuzzy.compat.UTC)
96-
date_gen = fuzzy.FuzzyDateTime(start_dt=start_dt, end_dt=end_dt)
97-
site = SiteFactory()
98-
course_overviews = [CourseOverviewFactory() for i in range(2)]
99-
users = [UserFactory() for i in range(2)]
100-
sm = []
101-
for user in users:
102-
for co in course_overviews:
103-
sm.append(StudentModuleFactory(course_id=co.id,
104-
student=user,
105-
modified=date_gen.evaluate(2, None, False)))
106-
107-
if organizations_support_sites():
108-
org = OrganizationFactory(sites=[site])
109-
for co in course_overviews:
110-
OrganizationCourseFactory(organization=org, course_id=str(co.id))
111-
for user in users:
112-
UserOrganizationMappingFactory(user=user,
113-
organization=org)
114-
115-
active_user_count = get_site_mau_current_month(site)
116-
freezer.stop()
117-
assert active_user_count == len(users)

tests/pipeline/test_site_daily_metrics.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
'''Tests figures.pipeline.
1+
'''Tests figures.pipeline.site_daily_metrics module
2+
3+
IMPORTANT: We need to refactor the test data in this test module as this was
4+
early work and we've learned a lot since then.
25
36
TODO:
47
@@ -71,6 +74,7 @@
7174
total_user_count=200,
7275
course_count=len(CDM_INPUT_TEST_DATA),
7376
total_enrollment_count=100,
77+
mau=55,
7478
)
7579
]
7680

@@ -81,6 +85,7 @@
8185
total_user_count=200,
8286
course_count=len(CDM_INPUT_TEST_DATA),
8387
total_enrollment_count=150,
88+
mau=56,
8489
)
8590

8691

@@ -232,12 +237,15 @@ def setup(self, db):
232237
organization=self.organization)
233238

234239
def test_extract(self, monkeypatch):
240+
previous_cumulative_active_user_count = 50
241+
235242
expected_results = dict(
236243
cumulative_active_user_count=52, # previous cumulative is 50
237244
todays_active_user_count=2,
238245
total_user_count=len(self.users),
239246
course_count=len(CDM_INPUT_TEST_DATA),
240247
total_enrollment_count=150,
248+
mau=len(self.users), # expect 3
241249
)
242250

243251
assert not StudentModule.objects.count()
@@ -253,6 +261,19 @@ def mock_student_modules_for_site(site):
253261
monkeypatch.setattr(pipeline_sdm, 'get_student_modules_for_site',
254262
mock_student_modules_for_site)
255263

264+
def mock_site_mau_1g_for_month_as_of_day(site, date_for):
265+
return get_user_model().objects.filter(
266+
id__in=[user.id for user in self.users]).values('id')
267+
268+
monkeypatch.setattr(pipeline_sdm, 'site_mau_1g_for_month_as_of_day',
269+
mock_site_mau_1g_for_month_as_of_day)
270+
271+
def mock_get_previous_cumulative_active_user_count(site, date_for):
272+
return previous_cumulative_active_user_count
273+
274+
monkeypatch.setattr(pipeline_sdm, 'get_previous_cumulative_active_user_count',
275+
mock_get_previous_cumulative_active_user_count)
276+
256277
for course in figures.sites.get_courses_for_site(self.site):
257278
assert course.created.date() < self.date_for
258279
for user in figures.sites.get_users_for_site(self.site):
@@ -278,6 +299,7 @@ class TestSiteDailyMetricsLoader(object):
278299
total_user_count=3,
279300
course_count=4,
280301
total_enrollment_count=5,
302+
mau=6,
281303
)
282304

283305
class MockExtractor(object):

tests/test_mau.py

+76
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11

22
from datetime import datetime
33
from freezegun import freeze_time
4+
from dateutil.relativedelta import relativedelta
5+
import pytest
6+
7+
from django.utils.timezone import utc
48

59
from courseware.models import StudentModule
610

@@ -11,9 +15,13 @@
1115
from figures.mau import (
1216
get_mau_from_student_modules,
1317
get_mau_from_site_course,
18+
mau_1g_for_month_as_of_day,
19+
site_mau_1g_for_month_as_of_day,
1420
store_mau_metrics,
1521
)
1622

23+
from tests.factories import StudentModuleFactory
24+
1725

1826
def test_get_mau_from_site_course(sm_test_data):
1927
"""Basic test for coverage with simple check
@@ -47,6 +55,74 @@ def test_get_mau_from_sm_for_site(sm_test_data):
4755
assert set(users) == set(sm_check)
4856

4957

58+
@pytest.mark.django_db
59+
def test_mau_1g_for_month_as_of_day_first_day_next_month(db):
60+
"""
61+
Test getting live MAU 1G values from StudentModule for the given day
62+
63+
Quick-n-dirty data setup:
64+
65+
We want to make sure we get the right records when the query happens on the
66+
first day of the next month. So we do the following
67+
68+
* Add a StudentModule record for two months before
69+
* Add at least one StudentModule record for the month we want
70+
* Add at least one StudentModule record for after the month we want
71+
72+
This sets up the scenario that we run the daily pipeline to capture MAU
73+
"as of" yesterday (the last day of the previous month) to capture MAU for
74+
the previous month
75+
"""
76+
mock_today = datetime(year=2020, month=4, day=1).replace(tzinfo=utc)
77+
month_before = datetime(year=2020, month=2, day=2).replace(tzinfo=utc)
78+
in_dates = [datetime(year=2020, month=3, day=1).replace(tzinfo=utc),
79+
datetime(year=2020, month=3, day=15).replace(tzinfo=utc),
80+
datetime(year=2020, month=3, day=31).replace(tzinfo=utc)]
81+
date_for = mock_today.date() - relativedelta(days=1)
82+
83+
# Create a student module in the month before, and in month after
84+
StudentModuleFactory(created=month_before, modified=month_before)
85+
StudentModuleFactory(created=mock_today, modified=mock_today)
86+
sm_in = [StudentModuleFactory(created=rec,
87+
modified=rec) for rec in in_dates]
88+
expected_user_ids = [obj.student_id for obj in sm_in]
89+
90+
sm_queryset = StudentModule.objects.all()
91+
user_ids = mau_1g_for_month_as_of_day(sm_queryset=sm_queryset,
92+
date_for=date_for)
93+
assert set([rec['student__id'] for rec in user_ids]) == set(expected_user_ids)
94+
95+
96+
def test_site_mau_1g_for_month_as_of_day(monkeypatch):
97+
"""Test our wrapper function, site_mau_1g_for_month_as_of_day
98+
99+
All we really care about is the call stack is what we expect with the args
100+
we expect
101+
"""
102+
expected_site = 'this is our site'
103+
expected_date_for = 'this is my date'
104+
expected_sm_queryset = 'this is my expected student module queryset'
105+
expected_user_id_qs = 'this is my expected user id queryset'
106+
107+
def mock_get_student_modules_for_site(site):
108+
assert site == expected_site
109+
return expected_sm_queryset
110+
111+
def mock_mau_1g_for_month_as_of_day(sm_queryset, date_for):
112+
assert date_for == expected_date_for
113+
assert sm_queryset == expected_sm_queryset
114+
return expected_user_id_qs
115+
116+
monkeypatch.setattr('figures.mau.mau_1g_for_month_as_of_day',
117+
mock_mau_1g_for_month_as_of_day)
118+
monkeypatch.setattr('figures.mau.get_student_modules_for_site',
119+
mock_get_student_modules_for_site)
120+
121+
qs = site_mau_1g_for_month_as_of_day(site=expected_site,
122+
date_for=expected_date_for)
123+
assert qs == expected_user_id_qs
124+
125+
50126
def test_store_mau_metrics(monkeypatch, sm_test_data):
51127
"""
52128
Basic minimal test

tests/views/test_site_daily_metrics_view.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,13 @@ def test_create(self):
123123
Note: We don't need write functionality with this view as of version 0.2.0
124124
"""
125125
data = dict(
126-
# site=SiteSerializer(self.site).data,
127126
date_for='2020-01-01',
128127
cumulative_active_user_count=1,
129128
todays_active_user_count=2,
130129
total_user_count=3,
131130
course_count=4,
132-
total_enrollment_count=5
131+
total_enrollment_count=5,
132+
mau=6,
133133
)
134134
# Might not need to set format='json'
135135
request = APIRequestFactory().post(

0 commit comments

Comments
 (0)