From 746e4fed1bfeb731b2f6288fe3ba4ea056397fdb Mon Sep 17 00:00:00 2001 From: Kaustav Banerjee Date: Mon, 8 Aug 2022 21:08:36 +0530 Subject: [PATCH] feat: allow switching anonymous user ID hashing algorithm from shake to md5 The hashing algorithm has been changed in cd60646. However, there are Open edX operators who maintain backward compatibility of anonymous user IDs after past rotations of their Django secret key. For them, altering the hashing algorithm was a breaking change that made their analytics inconsistent. --- cms/envs/common.py | 11 +++++++++++ common/djangoapps/student/models.py | 14 ++++++++++++-- common/djangoapps/student/tests/tests.py | 11 +++++++++++ lms/envs/common.py | 11 +++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/cms/envs/common.py b/cms/envs/common.py index f7f8a2fc208a..530850f64e5b 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -516,6 +516,17 @@ # in the LMS and CMS. # .. toggle_tickets: 'https://github.com/open-craft/edx-platform/pull/429' 'DISABLE_UNENROLLMENT': False, + + # .. toggle_name: FEATURES['ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID'] + # .. toggle_implementation: DjangoSetting + # .. toggle_default: False + # .. toggle_description: Whether to enable the legacy MD5 hashing algorithm to generate anonymous user id + # instead of the newer SHAKE128 hashing algorithm + # .. toggle_use_cases: open_edx + # .. toggle_creation_date: 2022-08-08 + # .. toggle_target_removal_date: None + # .. toggle_tickets: 'https://github.com/openedx/edx-platform/pull/30832' + 'ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID': False, } # .. toggle_name: ENABLE_COPPA_COMPLIANCE diff --git a/common/djangoapps/student/models.py b/common/djangoapps/student/models.py index 50521cafaf03..51ec2208a573 100644 --- a/common/djangoapps/student/models.py +++ b/common/djangoapps/student/models.py @@ -215,12 +215,22 @@ def anonymous_id_for_user(user, course_id): # function: Rotate at will, since the hashes are stored and # will not change. # include the secret key as a salt, and to make the ids unique across different LMS installs. - hasher = hashlib.shake_128() + legacy_hash_enabled = settings.FEATURES.get('ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID', False) + if legacy_hash_enabled: + # Use legacy MD5 algorithm if flag enabled + hasher = hashlib.md5() + else: + hasher = hashlib.shake_128() + hasher.update(settings.SECRET_KEY.encode('utf8')) hasher.update(str(user.id).encode('utf8')) if course_id: hasher.update(str(course_id).encode('utf-8')) - anonymous_user_id = hasher.hexdigest(16) # pylint: disable=too-many-function-args + + if legacy_hash_enabled: + anonymous_user_id = hasher.hexdigest() + else: + anonymous_user_id = hasher.hexdigest(16) # pylint: disable=too-many-function-args try: AnonymousUserId.objects.create( diff --git a/common/djangoapps/student/tests/tests.py b/common/djangoapps/student/tests/tests.py index a2e4c8a5decc..8d490712fdd3 100644 --- a/common/djangoapps/student/tests/tests.py +++ b/common/djangoapps/student/tests/tests.py @@ -1057,6 +1057,17 @@ def test_anonymous_id_secret_key_changes_result_in_diff_values_for_same_new_user assert anonymous_id != new_anonymous_id assert self.user == user_by_anonymous_id(new_anonymous_id) + def test_enable_legacy_hash_flag(self): + """Test that different anonymous id returned if ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID enabled.""" + CourseEnrollment.enroll(self.user, self.course.id) + anonymous_id = anonymous_id_for_user(self.user, self.course.id) + with patch.dict(settings.FEATURES, ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID=True): + # Recreate user object to clear cached anonymous id. + self.user = User.objects.get(pk=self.user.id) + AnonymousUserId.objects.filter(user=self.user).filter(course_id=self.course.id).delete() + new_anonymous_id = anonymous_id_for_user(self.user, self.course.id) + assert anonymous_id != new_anonymous_id + @skip_unless_lms @patch('openedx.core.djangoapps.programs.utils.get_programs') diff --git a/lms/envs/common.py b/lms/envs/common.py index c05f7caac332..aa26fc1d8517 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -1018,6 +1018,17 @@ # .. toggle_target_removal_date: None # .. toggle_tickets: 'https://openedx.atlassian.net/browse/MST-1458' 'ENABLE_CERTIFICATES_IDV_REQUIREMENT': False, + + # .. toggle_name: FEATURES['ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID'] + # .. toggle_implementation: DjangoSetting + # .. toggle_default: False + # .. toggle_description: Whether to enable the legacy MD5 hashing algorithm to generate anonymous user id + # instead of the newer SHAKE128 hashing algorithm + # .. toggle_use_cases: open_edx + # .. toggle_creation_date: 2022-08-08 + # .. toggle_target_removal_date: None + # .. toggle_tickets: 'https://github.com/openedx/edx-platform/pull/30832' + 'ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID': False, } # Specifies extra XBlock fields that should available when requested via the Course Blocks API