Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Performance for monthly active users API endpoint #234

Merged
merged 2 commits into from
Jul 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions figures/mau.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,40 @@ def retrieve_live_course_mau_data(site, course_id):
)


def mau_1g_for_month_as_of_day(sm_queryset, date_for):
"""Get the MAU from the sm, as of the "date_for" for "date_for" month

sm_queryset is the StudentModule queryset for our source

This is a MAU 1G function that calculates the monthly active users as of the
day in the given month.

This function queries `courseware.models.StudentModule` to identify users
who are active in the site

Returns a queryset of distinct user ids
"""
month_sm = sm_queryset.filter(modified__year=date_for.year,
modified__month=date_for.month,
modified__day__lte=date_for.day)
return month_sm.values('student__id').distinct()


def site_mau_1g_for_month_as_of_day(site, date_for):
"""Get the MAU for the given site, as of the "date_for" in the month

This is a conenvience function. It gets the student modules for the site,
then calls

`figures.mau.mau_for_month_as_of_day(...)`

Returns a queryset with distinct user ids
"""
site_sm = get_student_modules_for_site(site)
return mau_1g_for_month_as_of_day(sm_queryset=site_sm,
date_for=date_for)


def store_mau_metrics(site, overwrite=False):
"""
Save "snapshot" of MAU metrics
Expand Down
21 changes: 4 additions & 17 deletions figures/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,10 @@ def get_site_mau_history_metrics(site, months_back):
month=str(rec.month_for.month).zfill(2))
history.append(dict(period=period, value=rec.active_user_count))

# Hack to set current month data in the history list
current_month_active = get_site_mau_current_month(site)
# Get our latest stored site MAU count
sdm = SiteDailyMetrics.latest_previous_record(site=site)
current_month_active = sdm.mau if sdm else 0

if history:
# reverse the list because it is currently in reverser chronological order
history.reverse()
Expand All @@ -266,21 +268,6 @@ def get_site_mau_history_metrics(site, months_back):
return dict(current_month=current_month_active, history=history)


def get_site_mau_current_month(site):
"""Specific function to get the live active users for the current month

Developers note: We're starting with the simple aproach for MAU 1G, first
generation. When we update for MAU 2G, we will be able to make the query
more efficient by pulling unique users from a single table used for live
capture.
"""
month_for = datetime.datetime.utcnow()
site_sm = figures.sites.get_student_modules_for_site(site)
curr_sm = site_sm.filter(modified__year=month_for.year,
modified__month=month_for.month)
return curr_sm.values('student__id').distinct().count()


def get_active_users_for_time_period(site, start_date, end_date, course_ids=None):
"""
Returns the number of users active in the time period.
Expand Down
20 changes: 20 additions & 0 deletions figures/migrations/0011_add_mau_to_site_daily_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.15 on 2020-07-10 19:06
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('figures', '0010_site_monthly_metrics'),
]

operations = [
migrations.AddField(
model_name='sitedailymetrics',
name='mau',
field=models.IntegerField(blank=True, null=True),
),
]
10 changes: 9 additions & 1 deletion figures/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,23 @@ def latest_previous_record(cls, site, course_id, date_for=None):
class SiteDailyMetrics(TimeStampedModel):
"""
Stores metrics for a given site and day

When we upgrade to MAU 2G, we'll add a new field, 'active_users_today'
and pull the data from the previous day's live active user capture
"""

site = models.ForeignKey(Site)
# Date for which this record's data are collected
date_for = models.DateField()

# Under review for deletion
cumulative_active_user_count = models.IntegerField(blank=True, null=True)

todays_active_user_count = models.IntegerField(blank=True, null=True)
total_user_count = models.IntegerField()
course_count = models.IntegerField()
total_enrollment_count = models.IntegerField()
mau = models.IntegerField(blank=True, null=True)

class Meta:
"""
Expand Down Expand Up @@ -129,7 +136,8 @@ def latest_previous_record(cls, site, date_for=None):

if date_for:
filter_args['date_for__lt'] = date_for
return cls.objects.filter(**filter_args).order_by('-date_for').first()
recs = cls.objects.filter(**filter_args).order_by('-date_for')
return recs[0] if recs else None


@python_2_unicode_compatible
Expand Down
5 changes: 5 additions & 0 deletions figures/pipeline/site_daily_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from django.db.models import Sum

from figures.helpers import as_course_key, as_datetime, next_day, prev_day
from figures.mau import site_mau_1g_for_month_as_of_day
from figures.models import CourseDailyMetrics, SiteDailyMetrics
from figures.sites import (
get_courses_for_site,
Expand Down Expand Up @@ -133,12 +134,15 @@ def extract(self, site, date_for=None, **kwargs): # pylint: disable=unused-argu

todays_active_users = get_site_active_users_for_date(site, date_for)
todays_active_user_count = todays_active_users.count()
mau = site_mau_1g_for_month_as_of_day(site, date_for)

data['todays_active_user_count'] = todays_active_user_count
data['cumulative_active_user_count'] = get_previous_cumulative_active_user_count(
site, date_for) + todays_active_user_count
data['total_user_count'] = user_count
data['course_count'] = course_count
data['total_enrollment_count'] = get_total_enrollment_count(site, date_for)
data['mau'] = mau.count()
return data


Expand Down Expand Up @@ -186,6 +190,7 @@ def load(self, site, date_for=None, force_update=False, **_kwargs):
total_user_count=data['total_user_count'],
course_count=data['course_count'],
total_enrollment_count=data['total_enrollment_count'],
mau=data['mau'],
)
)
return site_metrics, created
1 change: 1 addition & 0 deletions tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ class Meta:
total_user_count = factory.Sequence(lambda n: n)
course_count = factory.Sequence(lambda n: n)
total_enrollment_count = factory.Sequence(lambda n: n)
mau = factory.Sequence(lambda n: n)


class SiteMonthlyMetricsFactory(DjangoModelFactory):
Expand Down
57 changes: 7 additions & 50 deletions tests/metrics/test_site_monthly_metrics.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,22 @@
"""
"""

from datetime import date, datetime
from factory import fuzzy
from datetime import date
from freezegun import freeze_time

from dateutil.rrule import rrule, MONTHLY
from dateutil.relativedelta import relativedelta

import pytest

from figures.metrics import (
get_site_mau_history_metrics,
get_site_mau_current_month
)
from figures.metrics import get_site_mau_history_metrics
from figures.models import SiteMonthlyMetrics

from tests.factories import (
CourseOverviewFactory,
OrganizationFactory,
OrganizationCourseFactory,
SiteDailyMetricsFactory,
SiteMonthlyMetricsFactory,
SiteFactory,
StudentModuleFactory,
UserFactory,
)
from tests.helpers import organizations_support_sites

if organizations_support_sites():
from tests.factories import UserOrganizationMappingFactory


@pytest.mark.django_db
Expand All @@ -45,6 +33,7 @@ def test_get_site_mau_history_metrics_basic(db, monkeypatch):
{'period': '2020/05', 'value': 11},
{'period': '2020/06', 'value': 12}]}

TODO: We want to revisit both this test and the function under test
"""
all_months_back = 12
months_back = 6
Expand All @@ -68,8 +57,9 @@ def test_get_site_mau_history_metrics_basic(db, monkeypatch):
active_user_count=counter))

current_month_active = 42
monkeypatch.setattr('figures.metrics.get_site_mau_current_month',
lambda n: current_month_active)
SiteDailyMetricsFactory(site=our_site,
date_for=mock_today - relativedelta(day=2),
mau=current_month_active)

data = get_site_mau_history_metrics(site=our_site, months_back=months_back)

Expand All @@ -82,36 +72,3 @@ def test_get_site_mau_history_metrics_basic(db, monkeypatch):
obj = SiteMonthlyMetrics.objects.get(site=our_site, month_for=month_for)
assert obj.active_user_count == rec['value']
assert obj.site == our_site


@pytest.mark.django_db
def test_get_site_mau_current_month(db):

mock_today = date(year=2020, month=3, day=1)
freezer = freeze_time(mock_today)
freezer.start()

start_dt = datetime(mock_today.year, mock_today.month, 1, tzinfo=fuzzy.compat.UTC)
end_dt = datetime(mock_today.year, mock_today.month, 31, tzinfo=fuzzy.compat.UTC)
date_gen = fuzzy.FuzzyDateTime(start_dt=start_dt, end_dt=end_dt)
site = SiteFactory()
course_overviews = [CourseOverviewFactory() for i in range(2)]
users = [UserFactory() for i in range(2)]
sm = []
for user in users:
for co in course_overviews:
sm.append(StudentModuleFactory(course_id=co.id,
student=user,
modified=date_gen.evaluate(2, None, False)))

if organizations_support_sites():
org = OrganizationFactory(sites=[site])
for co in course_overviews:
OrganizationCourseFactory(organization=org, course_id=str(co.id))
for user in users:
UserOrganizationMappingFactory(user=user,
organization=org)

active_user_count = get_site_mau_current_month(site)
freezer.stop()
assert active_user_count == len(users)
6 changes: 4 additions & 2 deletions tests/models/test_site_daily_metrics_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def test_create(self):
cumulative_active_user_count=11,
total_user_count=1,
course_count=1,
total_enrollment_count=1
total_enrollment_count=1,
mau=1,
),
)
site_metrics, created = SiteDailyMetrics.objects.get_or_create(**rec)
Expand All @@ -58,7 +59,8 @@ def test_multiple_sites(self):
cumulative_active_user_count=11,
total_user_count=1,
course_count=1,
total_enrollment_count=1
total_enrollment_count=1,
mau=1,
)
obj = SiteDailyMetrics.objects.create(**rec)
assert obj.site == default_site
Expand Down
24 changes: 23 additions & 1 deletion tests/pipeline/test_site_daily_metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
'''Tests figures.pipeline.
'''Tests figures.pipeline.site_daily_metrics module

IMPORTANT: We need to refactor the test data in this test module as this was
early work and we've learned a lot since then.

TODO:

Expand Down Expand Up @@ -71,6 +74,7 @@
total_user_count=200,
course_count=len(CDM_INPUT_TEST_DATA),
total_enrollment_count=100,
mau=55,
)
]

Expand All @@ -81,6 +85,7 @@
total_user_count=200,
course_count=len(CDM_INPUT_TEST_DATA),
total_enrollment_count=150,
mau=56,
)


Expand Down Expand Up @@ -232,12 +237,15 @@ def setup(self, db):
organization=self.organization)

def test_extract(self, monkeypatch):
previous_cumulative_active_user_count = 50

expected_results = dict(
cumulative_active_user_count=52, # previous cumulative is 50
todays_active_user_count=2,
total_user_count=len(self.users),
course_count=len(CDM_INPUT_TEST_DATA),
total_enrollment_count=150,
mau=len(self.users), # expect 3
)

assert not StudentModule.objects.count()
Expand All @@ -253,6 +261,19 @@ def mock_student_modules_for_site(site):
monkeypatch.setattr(pipeline_sdm, 'get_student_modules_for_site',
mock_student_modules_for_site)

def mock_site_mau_1g_for_month_as_of_day(site, date_for):
return get_user_model().objects.filter(
id__in=[user.id for user in self.users]).values('id')

monkeypatch.setattr(pipeline_sdm, 'site_mau_1g_for_month_as_of_day',
mock_site_mau_1g_for_month_as_of_day)

def mock_get_previous_cumulative_active_user_count(site, date_for):
return previous_cumulative_active_user_count

monkeypatch.setattr(pipeline_sdm, 'get_previous_cumulative_active_user_count',
mock_get_previous_cumulative_active_user_count)

for course in figures.sites.get_courses_for_site(self.site):
assert course.created.date() < self.date_for
for user in figures.sites.get_users_for_site(self.site):
Expand All @@ -278,6 +299,7 @@ class TestSiteDailyMetricsLoader(object):
total_user_count=3,
course_count=4,
total_enrollment_count=5,
mau=6,
)

class MockExtractor(object):
Expand Down
Loading