Skip to content

Commit

Permalink
Added 'mau' collection to Figures pipeline
Browse files Browse the repository at this point in the history
The purpose of this is to improve API performance. See the previous
commit with SHA 92ea8c4 for an
explanation.

This commit adds the pipeline code to populate 'SiteDailyMetrics.mau'
with the latest numbers for the month as of yesterday

We also clean up the old live MAU collection from StudentModule,
removing dead coe and the tests that call the dead code
  • Loading branch information
johnbaldwin committed Jul 13, 2020
1 parent 92ea8c4 commit 21d88df
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 70 deletions.
34 changes: 34 additions & 0 deletions figures/mau.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,40 @@ def retrieve_live_course_mau_data(site, course_id):
)


def mau_1g_for_month_as_of_day(sm_queryset, date_for):
"""Get the MAU from the sm, as of the "date_for" for "date_for" month
sm_queryset is the StudentModule queryset for our source
This is a MAU 1G function that calculates the monthly active users as of the
day in the given month.
This function queries `courseware.models.StudentModule` to identify users
who are active in the site
Returns a queryset of distinct user ids
"""
month_sm = sm_queryset.filter(modified__year=date_for.year,
modified__month=date_for.month,
modified__day__lte=date_for.day)
return month_sm.values('student__id').distinct()


def site_mau_1g_for_month_as_of_day(site, date_for):
"""Get the MAU for the given site, as of the "date_for" in the month
This is a conenvience function. It gets the student modules for the site,
then calls
`figures.mau.mau_for_month_as_of_day(...)`
Returns a queryset with distinct user ids
"""
site_sm = get_student_modules_for_site(site)
return mau_1g_for_month_as_of_day(sm_queryset=site_sm,
date_for=date_for)


def store_mau_metrics(site, overwrite=False):
"""
Save "snapshot" of MAU metrics
Expand Down
21 changes: 4 additions & 17 deletions figures/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,10 @@ def get_site_mau_history_metrics(site, months_back):
month=str(rec.month_for.month).zfill(2))
history.append(dict(period=period, value=rec.active_user_count))

# Hack to set current month data in the history list
current_month_active = get_site_mau_current_month(site)
# Get our latest stored site MAU count
sdm = SiteDailyMetrics.latest_previous_record(site=site)
current_month_active = sdm.mau if sdm else 0

if history:
# reverse the list because it is currently in reverser chronological order
history.reverse()
Expand All @@ -266,21 +268,6 @@ def get_site_mau_history_metrics(site, months_back):
return dict(current_month=current_month_active, history=history)


def get_site_mau_current_month(site):
"""Specific function to get the live active users for the current month
Developers note: We're starting with the simple aproach for MAU 1G, first
generation. When we update for MAU 2G, we will be able to make the query
more efficient by pulling unique users from a single table used for live
capture.
"""
month_for = datetime.datetime.utcnow()
site_sm = figures.sites.get_student_modules_for_site(site)
curr_sm = site_sm.filter(modified__year=month_for.year,
modified__month=month_for.month)
return curr_sm.values('student__id').distinct().count()


def get_active_users_for_time_period(site, start_date, end_date, course_ids=None):
"""
Returns the number of users active in the time period.
Expand Down
5 changes: 5 additions & 0 deletions figures/pipeline/site_daily_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from django.db.models import Sum

from figures.helpers import as_course_key, as_datetime, next_day, prev_day
from figures.mau import site_mau_1g_for_month_as_of_day
from figures.models import CourseDailyMetrics, SiteDailyMetrics
from figures.sites import (
get_courses_for_site,
Expand Down Expand Up @@ -133,12 +134,15 @@ def extract(self, site, date_for=None, **kwargs): # pylint: disable=unused-argu

todays_active_users = get_site_active_users_for_date(site, date_for)
todays_active_user_count = todays_active_users.count()
mau = site_mau_1g_for_month_as_of_day(site, date_for)

data['todays_active_user_count'] = todays_active_user_count
data['cumulative_active_user_count'] = get_previous_cumulative_active_user_count(
site, date_for) + todays_active_user_count
data['total_user_count'] = user_count
data['course_count'] = course_count
data['total_enrollment_count'] = get_total_enrollment_count(site, date_for)
data['mau'] = mau.count()
return data


Expand Down Expand Up @@ -186,6 +190,7 @@ def load(self, site, date_for=None, force_update=False, **_kwargs):
total_user_count=data['total_user_count'],
course_count=data['course_count'],
total_enrollment_count=data['total_enrollment_count'],
mau=data['mau'],
)
)
return site_metrics, created
57 changes: 7 additions & 50 deletions tests/metrics/test_site_monthly_metrics.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,22 @@
"""
"""

from datetime import date, datetime
from factory import fuzzy
from datetime import date
from freezegun import freeze_time

from dateutil.rrule import rrule, MONTHLY
from dateutil.relativedelta import relativedelta

import pytest

from figures.metrics import (
get_site_mau_history_metrics,
get_site_mau_current_month
)
from figures.metrics import get_site_mau_history_metrics
from figures.models import SiteMonthlyMetrics

from tests.factories import (
CourseOverviewFactory,
OrganizationFactory,
OrganizationCourseFactory,
SiteDailyMetricsFactory,
SiteMonthlyMetricsFactory,
SiteFactory,
StudentModuleFactory,
UserFactory,
)
from tests.helpers import organizations_support_sites

if organizations_support_sites():
from tests.factories import UserOrganizationMappingFactory


@pytest.mark.django_db
Expand All @@ -45,6 +33,7 @@ def test_get_site_mau_history_metrics_basic(db, monkeypatch):
{'period': '2020/05', 'value': 11},
{'period': '2020/06', 'value': 12}]}
TODO: We want to revisit both this test and the function under test
"""
all_months_back = 12
months_back = 6
Expand All @@ -68,8 +57,9 @@ def test_get_site_mau_history_metrics_basic(db, monkeypatch):
active_user_count=counter))

current_month_active = 42
monkeypatch.setattr('figures.metrics.get_site_mau_current_month',
lambda n: current_month_active)
SiteDailyMetricsFactory(site=our_site,
date_for=mock_today - relativedelta(day=2),
mau=current_month_active)

data = get_site_mau_history_metrics(site=our_site, months_back=months_back)

Expand All @@ -82,36 +72,3 @@ def test_get_site_mau_history_metrics_basic(db, monkeypatch):
obj = SiteMonthlyMetrics.objects.get(site=our_site, month_for=month_for)
assert obj.active_user_count == rec['value']
assert obj.site == our_site


@pytest.mark.django_db
def test_get_site_mau_current_month(db):

mock_today = date(year=2020, month=3, day=1)
freezer = freeze_time(mock_today)
freezer.start()

start_dt = datetime(mock_today.year, mock_today.month, 1, tzinfo=fuzzy.compat.UTC)
end_dt = datetime(mock_today.year, mock_today.month, 31, tzinfo=fuzzy.compat.UTC)
date_gen = fuzzy.FuzzyDateTime(start_dt=start_dt, end_dt=end_dt)
site = SiteFactory()
course_overviews = [CourseOverviewFactory() for i in range(2)]
users = [UserFactory() for i in range(2)]
sm = []
for user in users:
for co in course_overviews:
sm.append(StudentModuleFactory(course_id=co.id,
student=user,
modified=date_gen.evaluate(2, None, False)))

if organizations_support_sites():
org = OrganizationFactory(sites=[site])
for co in course_overviews:
OrganizationCourseFactory(organization=org, course_id=str(co.id))
for user in users:
UserOrganizationMappingFactory(user=user,
organization=org)

active_user_count = get_site_mau_current_month(site)
freezer.stop()
assert active_user_count == len(users)
24 changes: 23 additions & 1 deletion tests/pipeline/test_site_daily_metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
'''Tests figures.pipeline.
'''Tests figures.pipeline.site_daily_metrics module
IMPORTANT: We need to refactor the test data in this test module as this was
early work and we've learned a lot since then.
TODO:
Expand Down Expand Up @@ -71,6 +74,7 @@
total_user_count=200,
course_count=len(CDM_INPUT_TEST_DATA),
total_enrollment_count=100,
mau=55,
)
]

Expand All @@ -81,6 +85,7 @@
total_user_count=200,
course_count=len(CDM_INPUT_TEST_DATA),
total_enrollment_count=150,
mau=56,
)


Expand Down Expand Up @@ -232,12 +237,15 @@ def setup(self, db):
organization=self.organization)

def test_extract(self, monkeypatch):
previous_cumulative_active_user_count = 50

expected_results = dict(
cumulative_active_user_count=52, # previous cumulative is 50
todays_active_user_count=2,
total_user_count=len(self.users),
course_count=len(CDM_INPUT_TEST_DATA),
total_enrollment_count=150,
mau=len(self.users), # expect 3
)

assert not StudentModule.objects.count()
Expand All @@ -253,6 +261,19 @@ def mock_student_modules_for_site(site):
monkeypatch.setattr(pipeline_sdm, 'get_student_modules_for_site',
mock_student_modules_for_site)

def mock_site_mau_1g_for_month_as_of_day(site, date_for):
return get_user_model().objects.filter(
id__in=[user.id for user in self.users]).values('id')

monkeypatch.setattr(pipeline_sdm, 'site_mau_1g_for_month_as_of_day',
mock_site_mau_1g_for_month_as_of_day)

def mock_get_previous_cumulative_active_user_count(site, date_for):
return previous_cumulative_active_user_count

monkeypatch.setattr(pipeline_sdm, 'get_previous_cumulative_active_user_count',
mock_get_previous_cumulative_active_user_count)

for course in figures.sites.get_courses_for_site(self.site):
assert course.created.date() < self.date_for
for user in figures.sites.get_users_for_site(self.site):
Expand All @@ -278,6 +299,7 @@ class TestSiteDailyMetricsLoader(object):
total_user_count=3,
course_count=4,
total_enrollment_count=5,
mau=6,
)

class MockExtractor(object):
Expand Down
76 changes: 76 additions & 0 deletions tests/test_mau.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@

from datetime import datetime
from freezegun import freeze_time
from dateutil.relativedelta import relativedelta
import pytest

from django.utils.timezone import utc

from courseware.models import StudentModule

Expand All @@ -11,9 +15,13 @@
from figures.mau import (
get_mau_from_student_modules,
get_mau_from_site_course,
mau_1g_for_month_as_of_day,
site_mau_1g_for_month_as_of_day,
store_mau_metrics,
)

from tests.factories import StudentModuleFactory


def test_get_mau_from_site_course(sm_test_data):
"""Basic test for coverage with simple check
Expand Down Expand Up @@ -47,6 +55,74 @@ def test_get_mau_from_sm_for_site(sm_test_data):
assert set(users) == set(sm_check)


@pytest.mark.django_db
def test_mau_1g_for_month_as_of_day_first_day_next_month(db):
"""
Test getting live MAU 1G values from StudentModule for the given day
Quick-n-dirty data setup:
We want to make sure we get the right records when the query happens on the
first day of the next month. So we do the following
* Add a StudentModule record for two months before
* Add at least one StudentModule record for the month we want
* Add at least one StudentModule record for after the month we want
This sets up the scenario that we run the daily pipeline to capture MAU
"as of" yesterday (the last day of the previous month) to capture MAU for
the previous month
"""
mock_today = datetime(year=2020, month=4, day=1).replace(tzinfo=utc)
month_before = datetime(year=2020, month=2, day=2).replace(tzinfo=utc)
in_dates = [datetime(year=2020, month=3, day=1).replace(tzinfo=utc),
datetime(year=2020, month=3, day=15).replace(tzinfo=utc),
datetime(year=2020, month=3, day=31).replace(tzinfo=utc)]
date_for = mock_today.date() - relativedelta(days=1)

# Create a student module in the month before, and in month after
StudentModuleFactory(created=month_before, modified=month_before)
StudentModuleFactory(created=mock_today, modified=mock_today)
sm_in = [StudentModuleFactory(created=rec,
modified=rec) for rec in in_dates]
expected_user_ids = [obj.student_id for obj in sm_in]

sm_queryset = StudentModule.objects.all()
user_ids = mau_1g_for_month_as_of_day(sm_queryset=sm_queryset,
date_for=date_for)
assert set([rec['student__id'] for rec in user_ids]) == set(expected_user_ids)


def test_site_mau_1g_for_month_as_of_day(monkeypatch):
"""Test our wrapper function, site_mau_1g_for_month_as_of_day
All we really care about is the call stack is what we expect with the args
we expect
"""
expected_site = 'this is our site'
expected_date_for = 'this is my date'
expected_sm_queryset = 'this is my expected student module queryset'
expected_user_id_qs = 'this is my expected user id queryset'

def mock_get_student_modules_for_site(site):
assert site == expected_site
return expected_sm_queryset

def mock_mau_1g_for_month_as_of_day(sm_queryset, date_for):
assert date_for == expected_date_for
assert sm_queryset == expected_sm_queryset
return expected_user_id_qs

monkeypatch.setattr('figures.mau.mau_1g_for_month_as_of_day',
mock_mau_1g_for_month_as_of_day)
monkeypatch.setattr('figures.mau.get_student_modules_for_site',
mock_get_student_modules_for_site)

qs = site_mau_1g_for_month_as_of_day(site=expected_site,
date_for=expected_date_for)
assert qs == expected_user_id_qs


def test_store_mau_metrics(monkeypatch, sm_test_data):
"""
Basic minimal test
Expand Down
4 changes: 2 additions & 2 deletions tests/views/test_site_daily_metrics_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,13 @@ def test_create(self):
Note: We don't need write functionality with this view as of version 0.2.0
"""
data = dict(
# site=SiteSerializer(self.site).data,
date_for='2020-01-01',
cumulative_active_user_count=1,
todays_active_user_count=2,
total_user_count=3,
course_count=4,
total_enrollment_count=5
total_enrollment_count=5,
mau=6,
)
# Might not need to set format='json'
request = APIRequestFactory().post(
Expand Down

0 comments on commit 21d88df

Please sign in to comment.