From 6da7f588ddf0c851f49ab2f3829417fb7aec1718 Mon Sep 17 00:00:00 2001 From: Kaustav Banerjee Date: Mon, 8 Aug 2022 21:08:36 +0530 Subject: [PATCH] feat: allow switching anonymous user ID hashing algorithm from shake to md5 The hashing algorithm has been changed in cd60646. However, there are Open edX operators who maintain backward compatibility of anonymous user IDs after past rotations of their Django secret key. For them, altering the hashing algorithm was a breaking change that made their analytics inconsistent. (cherry picked from commit 746e4fed1bfeb731b2f6288fe3ba4ea056397fdb) (cherry picked from commit ff6d92fd892dff5e66bbf77eadf9b91ed2189a24) (cherry picked from commit 7245bdc7f73d9693f15acc885bee4e0d00be8245) --- cms/envs/common.py | 11 +++++++++++ common/djangoapps/student/models/user.py | 13 +++++++++++-- common/djangoapps/student/tests/tests.py | 11 +++++++++++ lms/envs/common.py | 11 +++++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/cms/envs/common.py b/cms/envs/common.py index d5e561aa9cf9..1d7adfff9175 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -531,6 +531,17 @@ # .. toggle_creation_date: 2023-03-31 # .. toggle_tickets: https://github.com/openedx/edx-platform/pull/32015 'DISABLE_ADVANCED_SETTINGS': False, + + # .. toggle_name: FEATURES['ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID'] + # .. toggle_implementation: DjangoSetting + # .. toggle_default: False + # .. toggle_description: Whether to enable the legacy MD5 hashing algorithm to generate anonymous user id + # instead of the newer SHAKE128 hashing algorithm + # .. toggle_use_cases: open_edx + # .. toggle_creation_date: 2022-08-08 + # .. toggle_target_removal_date: None + # .. toggle_tickets: 'https://github.com/openedx/edx-platform/pull/30832' + 'ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID': False, } # .. toggle_name: ENABLE_COPPA_COMPLIANCE diff --git a/common/djangoapps/student/models/user.py b/common/djangoapps/student/models/user.py index 2a4a9deb3664..cb806fed4467 100644 --- a/common/djangoapps/student/models/user.py +++ b/common/djangoapps/student/models/user.py @@ -154,12 +154,21 @@ def anonymous_id_for_user(user, course_id): # function: Rotate at will, since the hashes are stored and # will not change. # include the secret key as a salt, and to make the ids unique across different LMS installs. - hasher = hashlib.shake_128() + legacy_hash_enabled = settings.FEATURES.get('ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID', False) + if legacy_hash_enabled: + # Use legacy MD5 algorithm if flag enabled + hasher = hashlib.md5() + else: + hasher = hashlib.shake_128() hasher.update(settings.SECRET_KEY.encode('utf8')) hasher.update(str(user.id).encode('utf8')) if course_id: hasher.update(str(course_id).encode('utf-8')) - anonymous_user_id = hasher.hexdigest(16) + + if legacy_hash_enabled: + anonymous_user_id = hasher.hexdigest() + else: + anonymous_user_id = hasher.hexdigest(16) # pylint: disable=too-many-function-args try: AnonymousUserId.objects.create( diff --git a/common/djangoapps/student/tests/tests.py b/common/djangoapps/student/tests/tests.py index b41ad2f856d6..735706304e11 100644 --- a/common/djangoapps/student/tests/tests.py +++ b/common/djangoapps/student/tests/tests.py @@ -1050,6 +1050,17 @@ def test_anonymous_id_secret_key_changes_result_in_diff_values_for_same_new_user assert anonymous_id != new_anonymous_id assert self.user == user_by_anonymous_id(new_anonymous_id) + def test_enable_legacy_hash_flag(self): + """Test that different anonymous id returned if ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID enabled.""" + CourseEnrollment.enroll(self.user, self.course.id) + anonymous_id = anonymous_id_for_user(self.user, self.course.id) + with patch.dict(settings.FEATURES, ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID=True): + # Recreate user object to clear cached anonymous id. + self.user = User.objects.get(pk=self.user.id) + AnonymousUserId.objects.filter(user=self.user).filter(course_id=self.course.id).delete() + new_anonymous_id = anonymous_id_for_user(self.user, self.course.id) + assert anonymous_id != new_anonymous_id + @skip_unless_lms @patch('openedx.core.djangoapps.programs.utils.get_programs') diff --git a/lms/envs/common.py b/lms/envs/common.py index 13e031c76175..f3e3604241da 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -1039,6 +1039,17 @@ # .. toggle_creation_date: 2022-06-06 # .. toggle_tickets: 'https://github.com/edx/edx-platform/pull/29538' 'DISABLE_ALLOWED_ENROLLMENT_IF_ENROLLMENT_CLOSED': False, + + # .. toggle_name: FEATURES['ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID'] + # .. toggle_implementation: DjangoSetting + # .. toggle_default: False + # .. toggle_description: Whether to enable the legacy MD5 hashing algorithm to generate anonymous user id + # instead of the newer SHAKE128 hashing algorithm + # .. toggle_use_cases: open_edx + # .. toggle_creation_date: 2022-08-08 + # .. toggle_target_removal_date: None + # .. toggle_tickets: 'https://github.com/openedx/edx-platform/pull/30832' + 'ENABLE_LEGACY_MD5_HASH_FOR_ANONYMOUS_USER_ID': False, } # Specifies extra XBlock fields that should available when requested via the Course Blocks API