From 824864f4fcf7be1a27db27f9f75814831c55fc9f Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 5 Feb 2025 11:49:00 -0800 Subject: [PATCH 1/7] Fix name copy-pasta --- .../experiments/test/test_experiment_query_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py index a23628767a6a7..2092343c76b90 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py @@ -21,7 +21,7 @@ @override_settings(IN_UNIT_TESTING=True) -class TestExperimentTrendsQueryRunner(ClickhouseTestMixin, APIBaseTest): +class TestExperimentQueryRunner(ClickhouseTestMixin, APIBaseTest): def create_feature_flag(self, key="test-experiment"): return FeatureFlag.objects.create( name=f"Test experiment flag: {key}", From 8eb3650084f4f1b0cb851277ad0b53437036c350 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 5 Feb 2025 12:01:10 -0800 Subject: [PATCH 2/7] Add some tests for internal filters --- .../test/test_experiment_query_runner.py | 266 ++++++++++++++++++ 1 file changed, 266 insertions(+) diff --git a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py index 2092343c76b90..feedd42c8a716 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py @@ -1,15 +1,22 @@ +from typing import cast from django.test import override_settings from posthog.hogql_queries.experiments.experiment_query_runner import ExperimentQueryRunner +from posthog.models.cohort.cohort import Cohort from posthog.models.feature_flag.feature_flag import FeatureFlag +from posthog.models.group_type_mapping import GroupTypeMapping +from posthog.models.group.util import create_group from posthog.schema import ( EventsNode, ExperimentSignificanceCode, ExperimentTrendsQuery, + ExperimentTrendsQueryResponse, TrendsQuery, ) from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, + _create_event, + _create_person, flush_persons_and_events, ) from freezegun import freeze_time @@ -18,6 +25,7 @@ from posthog.test.test_journeys import journeys_for from posthog.models.experiment import Experiment from flaky import flaky +from parameterized import parameterized @override_settings(IN_UNIT_TESTING=True) @@ -196,3 +204,261 @@ def test_query_runner_standard_flow_v2_stats(self): self.assertEqual(test_variant.count, 5.0) # In the new query runner, the exposure value is the same as the absolute exposure value self.assertEqual(test_variant.exposure, 2.0) + + @parameterized.expand( + [ + [ + "person_properties", + { + "key": "email", + "value": "@posthog.com", + "operator": "not_icontains", + "type": "person", + }, + { + "control_absolute_exposure": 12, + "test_absolute_exposure": 15, + }, + ], + [ + "event_properties", + { + "key": "$host", + "value": "^(localhost|127\\.0\\.0\\.1)($|:)", + "operator": "not_regex", + "type": "event", + }, + { + "control_absolute_exposure": 6, + "test_absolute_exposure": 6, + }, + ], + [ + "feature_flags", + { + "key": "$feature/flag_doesnt_exist", + "type": "event", + "value": ["test", "control"], + "operator": "exact", + }, + { + "control_absolute_exposure": 0, + "test_absolute_exposure": 0, + }, + ], + [ + "cohort_static", + { + "key": "id", + "type": "static-cohort", + # value is generated in the test + "value": None, + "operator": "exact", + }, + { + "control_absolute_exposure": 2, + "test_absolute_exposure": 1, + }, + ], + [ + "cohort_dynamic", + { + "key": "id", + "type": "cohort", + # value is generated in the test + "value": None, + "operator": "exact", + }, + { + "control_absolute_exposure": 2, + "test_absolute_exposure": 1, + }, + ], + [ + "group", + { + "key": "name", + "type": "group", + # Value is generated in the test + "value": None, + "operator": "exact", + "group_type_index": 0, + }, + { + "control_absolute_exposure": 8, + "test_absolute_exposure": 10, + }, + ], + [ + "element", + { + "key": "tag_name", + "type": "element", + "value": ["button"], + "operator": "exact", + }, + { + "control_absolute_exposure": 0, + "test_absolute_exposure": 0, + }, + ], + ] + ) + def test_query_runner_with_internal_filters(self, name: str, filter: dict, expected_results: dict): + feature_flag = self.create_feature_flag() + experiment = self.create_experiment(feature_flag=feature_flag) + + cohort = None + if name == "cohort_static": + cohort = Cohort.objects.create( + team=self.team, + name="cohort_static", + is_static=True, + ) + filter["value"] = cohort.pk + elif name == "cohort_dynamic": + cohort = Cohort.objects.create( + team=self.team, + name="cohort_dynamic", + groups=[ + { + "properties": [ + {"key": "email", "operator": "not_icontains", "value": "@posthog.com", "type": "person"}, + ] + } + ], + ) + filter["value"] = cohort.pk + elif name == "group": + GroupTypeMapping.objects.create( + team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 + ) + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="my_awesome_group", + properties={"name": "Test Group"}, + ) + filter["value"] = ["Test Group"] + + self.team.test_account_filters = [filter] + self.team.save() + + feature_flag_property = f"$feature/{feature_flag.key}" + count_query = TrendsQuery(series=[EventsNode(event="$pageview")], filterTestAccounts=True) + + experiment_query = ExperimentTrendsQuery( + experiment_id=experiment.id, + kind="ExperimentTrendsQuery", + count_query=count_query, + exposure_query=None, + ) + + experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] + experiment.save() + + # Populate count events + for variant, count in [("control", 7), ("test", 9)]: + for i in range(count): + extra_properties = {"$host": "localhost", "$group_0": "my_awesome_group"} if i > 5 else {} + _create_event( + team=self.team, + event="$pageview", + distinct_id=f"user_{variant}_{i}", + properties={feature_flag_property: variant, **extra_properties}, + ) + + # Populate exposure events + for variant, count in [("control", 14), ("test", 16)]: + for i in range(count): + extra_properties = {"$host": "localhost", "$group_0": "my_awesome_group"} if i > 5 else {} + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id=f"user_{variant}_{i}", + properties={ + "$feature_flag_response": variant, + "$feature_flag": feature_flag.key, + **extra_properties, + }, + ) + + _create_person( + team=self.team, + distinct_ids=["user_control_1"], + ) + _create_person( + team=self.team, + distinct_ids=["user_control_2"], + ) + _create_person( + team=self.team, + distinct_ids=["user_control_3"], + properties={"email": "user_control_3@posthog.com"}, + ) + _create_person( + team=self.team, + distinct_ids=["user_control_6"], + properties={"email": "user_control_6@posthog.com"}, + ) + _create_person( + team=self.team, + distinct_ids=["user_test_2"], + properties={"email": "user_test_2@posthog.com"}, + ) + _create_person( + team=self.team, + distinct_ids=["user_test_3"], + ) + + flush_persons_and_events() + + if name == "cohort_static" and cohort: + cohort.insert_users_by_list(["user_control_1", "user_control_2", "user_test_2"]) + self.assertEqual(cohort.people.count(), 3) + elif name == "cohort_dynamic" and cohort: + cohort.calculate_people_ch(pending_version=0) + + query_runner = ExperimentQueryRunner( + query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team + ) + # "feature_flags" and "element" filter out all events + if name == "feature_flags" or name == "element": + with self.assertRaises(ValueError) as context: + query_runner.calculate() + + self.assertEqual(context.exception.args[0], "Control variant not found in experiment results") + else: + result = query_runner.calculate() + trend_result = cast(ExperimentTrendsQueryResponse, result) + + control_result = next(variant for variant in trend_result.variants if variant.key == "control") + test_result = next(variant for variant in trend_result.variants if variant.key == "test") + + self.assertEqual(control_result.absolute_exposure, expected_results["control_absolute_exposure"]) + self.assertEqual(test_result.absolute_exposure, expected_results["test_absolute_exposure"]) + + ## Run again with filterTestAccounts=False + count_query = TrendsQuery(series=[EventsNode(event="$pageview")], filterTestAccounts=False) + experiment_query = ExperimentTrendsQuery( + experiment_id=experiment.id, + kind="ExperimentTrendsQuery", + count_query=count_query, + exposure_query=None, + ) + + experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] + experiment.save() + + query_runner = ExperimentQueryRunner( + query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team + ) + result = query_runner.calculate() + + trend_result = cast(ExperimentTrendsQueryResponse, result) + + control_result = next(variant for variant in trend_result.variants if variant.key == "control") + test_result = next(variant for variant in trend_result.variants if variant.key == "test") + + self.assertEqual(control_result.absolute_exposure, 14) + self.assertEqual(test_result.absolute_exposure, 16) From 5f443c46bced8e253944677433e17ea8a63ed886 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 5 Feb 2025 12:26:56 -0800 Subject: [PATCH 3/7] Add query snapshots --- .../test_experiment_query_runner.ambr | 810 ++++++++++++++++++ .../test/test_experiment_query_runner.py | 3 + 2 files changed, 813 insertions(+) create mode 100644 posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr diff --git a/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr b/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr new file mode 100644 index 0000000000000..02e138fcab2cc --- /dev/null +++ b/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr @@ -0,0 +1,810 @@ +# serializer version: 1 +# name: TestExperimentQueryRunner.test_query_runner_standard_flow_v2_stats + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_0_person_properties + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email + FROM person + WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(notILike(events__person.properties___email, '%@posthog.com%'), 1)) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email + FROM person + WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(notILike(events__person.properties___email, '%@posthog.com%'), 1)) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_0_person_properties.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_1_event_properties + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(not(match(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$host'), ''), 'null'), '^"|"$', '')), '^(localhost|127\\.0\\.0\\.1)($|:)')), 1)) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(not(match(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$host'), ''), 'null'), '^"|"$', '')), '^(localhost|127\\.0\\.0\\.1)($|:)')), 1)) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_1_event_properties.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_2_feature_flags + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$feature/flag_doesnt_exist'), ''), 'null'), '^"|"$', ''), 'test'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$feature/flag_doesnt_exist'), ''), 'null'), '^"|"$', ''), 'control'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$feature/flag_doesnt_exist'), ''), 'null'), '^"|"$', ''), 'test'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$feature/flag_doesnt_exist'), ''), 'null'), '^"|"$', ''), 'control'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_2_feature_flags.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_3_cohort_static + ''' + + SELECT count(DISTINCT person_id) + FROM person_static_cohort + WHERE team_id = 99999 + AND cohort_id = 99999 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_3_cohort_static.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(in(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 99999), equals(person_static_cohort.cohort_id, 99999)))), 0)) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(in(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 99999), equals(person_static_cohort.cohort_id, 99999)))), 0)) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_3_cohort_static.2 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = NULL + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.1 + ''' + /* cohort_calculation: */ + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = 0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.2 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(in(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 99999), equals(cohortpeople.cohort_id, 99999), equals(cohortpeople.version, 0)))), 0)) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(in(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 99999), equals(cohortpeople.cohort_id, 99999), equals(cohortpeople.version, 0)))), 0)) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_4_cohort_dynamic.3 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_5_group + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___name, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS events__group_0 ON equals(events.`$group_0`, events__group_0.key) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(equals(events__group_0.properties___name, 'Test Group'), 0)) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___name, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS events__group_0 ON equals(events.`$group_0`, events__group_0.key) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(equals(events__group_0.properties___name, 'Test Group'), 0)) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_5_group.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_6_element + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), match(events.elements_chain, '(^|;)button(\\.|$|;|:)')) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), match(events.elements_chain, '(^|;)button(\\.|$|;|:)')) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_6_element.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.distinct_id AS distinct_id, + exposure.variant AS variant, + events.event AS event, + 1 AS value + FROM events + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(events.distinct_id, exposure.distinct_id) + WHERE and(equals(events.team_id, 99999), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), exposure.first_exposure_time), equals(events.event, '$pageview'))) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- diff --git a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py index feedd42c8a716..b144be40ffde0 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py @@ -18,6 +18,7 @@ _create_event, _create_person, flush_persons_and_events, + snapshot_clickhouse_queries, ) from freezegun import freeze_time from django.utils import timezone @@ -82,6 +83,7 @@ def create_experiment( @flaky(max_runs=10, min_passes=1) @freeze_time("2020-01-01T12:00:00Z") + @snapshot_clickhouse_queries def test_query_runner_standard_flow_v2_stats(self): feature_flag = self.create_feature_flag() experiment = self.create_experiment(feature_flag=feature_flag) @@ -304,6 +306,7 @@ def test_query_runner_standard_flow_v2_stats(self): ], ] ) + @snapshot_clickhouse_queries def test_query_runner_with_internal_filters(self, name: str, filter: dict, expected_results: dict): feature_flag = self.create_feature_flag() experiment = self.create_experiment(feature_flag=feature_flag) From bbf865e4510a94c01ad15ba92085155f08f8c9df Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 5 Feb 2025 12:59:15 -0800 Subject: [PATCH 4/7] First pass at data warehouse support --- .../experiments/experiment_query_runner.py | 118 ++- .../test_experiment_query_runner.ambr | 777 ++++++++++++++++++ .../test/test_experiment_query_runner.py | 436 +++++++++- 3 files changed, 1294 insertions(+), 37 deletions(-) diff --git a/posthog/hogql_queries/experiments/experiment_query_runner.py b/posthog/hogql_queries/experiments/experiment_query_runner.py index 22795683371a8..7b34b8f3bae6a 100644 --- a/posthog/hogql_queries/experiments/experiment_query_runner.py +++ b/posthog/hogql_queries/experiments/experiment_query_runner.py @@ -27,6 +27,7 @@ from rest_framework.exceptions import ValidationError from posthog.schema import ( CachedExperimentTrendsQueryResponse, + DataWarehouseNode, ExperimentSignificanceCode, ExperimentTrendsQuery, ExperimentTrendsQueryResponse, @@ -106,15 +107,17 @@ def _get_experiment_query(self) -> ast.SelectQuery: feature_flag_key = self.feature_flag.key - # Get the metric event we should filter on - metric_event = self.query.count_query.series[0].event + is_data_warehouse_query = isinstance(self.query.count_query.series[0], DataWarehouseNode) # Pick the correct value for the aggregation chosen match self._get_metric_type(): case ExperimentMetricType.CONTINUOUS: # If the metric type is continuous, we need to extract the value from the event property metric_property = self.query.count_query.series[0].math_property - metric_value = f"toFloat(JSONExtractRaw(properties, '{metric_property}'))" + if is_data_warehouse_query: + metric_value = f"toFloat('{metric_property}')" + else: + metric_value = f"toFloat(JSONExtractRaw(properties, '{metric_property}'))" case _: # Else, we default to count # We then just emit 1 so we can easily sum it up @@ -152,43 +155,86 @@ def _get_experiment_query(self) -> ast.SelectQuery: group_by=[ast.Field(chain=["variant"]), ast.Field(chain=["distinct_id"])], ) - # Metric events seen after exposure - # One row per event - events_after_exposure_query = ast.SelectQuery( - select=[ - ast.Field(chain=["events", "timestamp"]), - ast.Field(chain=["events", "distinct_id"]), - ast.Field(chain=["exposure", "variant"]), - ast.Field(chain=["events", "event"]), - parse_expr(f"{metric_value} as value"), - ], - select_from=ast.JoinExpr( - table=ast.Field(chain=["events"]), - next_join=ast.JoinExpr( - table=exposure_query, - join_type="INNER JOIN", - alias="exposure", - constraint=ast.JoinConstraint( - expr=ast.CompareOperation( - left=ast.Field(chain=["events", "distinct_id"]), - right=ast.Field(chain=["exposure", "distinct_id"]), - op=ast.CompareOperationOp.Eq, + if is_data_warehouse_query: + series_node = self.query.count_query.series[0] + events_after_exposure_query = ast.SelectQuery( + select=[ + ast.Alias( + alias="timestamp", expr=ast.Field(chain=[series_node.table_name, series_node.timestamp_field]) + ), + ast.Alias( + alias="distinct_id", + expr=ast.Field(chain=[series_node.table_name, series_node.distinct_id_field]), + ), + ast.Field(chain=["exposure", "variant"]), + parse_expr(f"{metric_value} as value"), + ], + select_from=ast.JoinExpr( + table=ast.Field(chain=[series_node.table_name]), + next_join=ast.JoinExpr( + table=exposure_query, + join_type="INNER JOIN", + alias="exposure", + constraint=ast.JoinConstraint( + expr=ast.CompareOperation( + left=ast.Field(chain=[series_node.table_name, series_node.distinct_id_field]), + right=ast.Field(chain=["exposure", "distinct_id"]), + op=ast.CompareOperationOp.Eq, + ), + constraint_type="ON", ), - constraint_type="ON", ), ), - ), - where=ast.And( - exprs=[ - ast.CompareOperation( - left=ast.Field(chain=["events", "timestamp"]), - right=ast.Field(chain=["exposure", "first_exposure_time"]), - op=ast.CompareOperationOp.GtEq, - ), - parse_expr(f"event = '{metric_event}'"), + where=ast.And( + exprs=[ + ast.CompareOperation( + left=ast.Field(chain=[series_node.table_name, series_node.timestamp_field]), + right=ast.Field(chain=["exposure", "first_exposure_time"]), + op=ast.CompareOperationOp.GtEq, + ), + # :TODO: Figure out if we actually need this + # parse_expr(f"event = '{self.query.count_query.series[0].event}'"), + ], + ), + ) + else: + # Metric events seen after exposure + # One row per event + events_after_exposure_query = ast.SelectQuery( + select=[ + ast.Field(chain=["events", "timestamp"]), + ast.Field(chain=["events", "distinct_id"]), + ast.Field(chain=["exposure", "variant"]), + ast.Field(chain=["events", "event"]), + parse_expr(f"{metric_value} as value"), ], - ), - ) + select_from=ast.JoinExpr( + table=ast.Field(chain=["events"]), + next_join=ast.JoinExpr( + table=exposure_query, + join_type="INNER JOIN", + alias="exposure", + constraint=ast.JoinConstraint( + expr=ast.CompareOperation( + left=ast.Field(chain=["events", "distinct_id"]), + right=ast.Field(chain=["exposure", "distinct_id"]), + op=ast.CompareOperationOp.Eq, + ), + constraint_type="ON", + ), + ), + ), + where=ast.And( + exprs=[ + ast.CompareOperation( + left=ast.Field(chain=["events", "timestamp"]), + right=ast.Field(chain=["exposure", "first_exposure_time"]), + op=ast.CompareOperationOp.GtEq, + ), + parse_expr(f"event = '{self.query.count_query.series[0].event}'"), + ], + ), + ) metrics_aggregated_per_entity_query = ast.SelectQuery( select=[ diff --git a/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr b/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr index 02e138fcab2cc..5aa00d6278085 100644 --- a/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr +++ b/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr @@ -45,6 +45,783 @@ max_bytes_before_external_group_by=0 ''' # --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_0_person_properties + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email + FROM person + WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(notILike(events__person.properties___email, '%@posthog.com%'), 1)) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email + FROM person + WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(notILike(events__person.properties___email, '%@posthog.com%'), 1)) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_0_person_properties.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_1_event_properties + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(not(match(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$host'), ''), 'null'), '^"|"$', '')), '^(localhost|127\\.0\\.0\\.1)($|:)')), 1)) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(not(match(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$host'), ''), 'null'), '^"|"$', '')), '^(localhost|127\\.0\\.0\\.1)($|:)')), 1)) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_1_event_properties.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_2_feature_flags + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$feature/flag_doesnt_exist'), ''), 'null'), '^"|"$', ''), 'test'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$feature/flag_doesnt_exist'), ''), 'null'), '^"|"$', ''), 'control'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$feature/flag_doesnt_exist'), ''), 'null'), '^"|"$', ''), 'test'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$feature/flag_doesnt_exist'), ''), 'null'), '^"|"$', ''), 'control'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_2_feature_flags.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_3_cohort_static + ''' + + SELECT count(DISTINCT person_id) + FROM person_static_cohort + WHERE team_id = 99999 + AND cohort_id = 99999 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_3_cohort_static.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(in(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 99999), equals(person_static_cohort.cohort_id, 99999)))), 0)) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(in(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 99999), equals(person_static_cohort.cohort_id, 99999)))), 0)) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_3_cohort_static.2 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = NULL + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.1 + ''' + /* cohort_calculation: */ + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 99999 + AND cohort_id = 99999 + AND version = 0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.2 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(in(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 99999), equals(cohortpeople.cohort_id, 99999), equals(cohortpeople.version, 0)))), 0)) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(in(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 99999), equals(cohortpeople.cohort_id, 99999), equals(cohortpeople.version, 0)))), 0)) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_4_cohort_dynamic.3 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_5_group + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___name, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS events__group_0 ON equals(events.`$group_0`, events__group_0.key) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(equals(events__group_0.properties___name, 'Test Group'), 0)) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___name, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS events__group_0 ON equals(events.`$group_0`, events__group_0.key) + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), ifNull(equals(events__group_0.properties___name, 'Test Group'), 0)) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_5_group.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_6_element + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), match(events.elements_chain, '(^|;)button(\\.|$|;|:)')) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0)), match(events.elements_chain, '(^|;)button(\\.|$|;|:)')) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestExperimentQueryRunner.test_query_runner_with_data_warehouse_internal_filters_6_element.1 + ''' + SELECT maq.variant AS variant, + count(maq.distinct_id) AS num_users, + sum(maq.value) AS total_sum, + sum(power(maq.value, 2)) AS total_sum_of_squares + FROM + (SELECT base.variant AS variant, + base.distinct_id AS distinct_id, + sum(coalesce(eae.value, 0)) AS value + FROM + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS base + LEFT JOIN + (SELECT usage.ds AS timestamp, + usage.userid AS distinct_id, + exposure.variant AS variant, + accurateCastOrNull('usage', 'Float64') AS value + FROM + (SELECT * + FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.experiments.queryrunner/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', '`ds` Date, `id` String, `usage` Int64, `userid` String')) AS usage + INNER JOIN + (SELECT events.distinct_id AS distinct_id, + replaceAll(JSONExtractRaw(events.properties, '$feature_flag_response'), '"', '') AS variant, + min(toTimeZone(events.timestamp, 'UTC')) AS first_exposure_time + FROM events + WHERE and(equals(events.team_id, 99999), and(equals(events.event, '$feature_flag_called'), ifNull(equals(replaceAll(JSONExtractRaw(events.properties, '$feature_flag'), '"', ''), 'test-experiment'), 0))) + GROUP BY variant, + events.distinct_id) AS exposure ON equals(usage.userid, exposure.distinct_id) + WHERE ifNull(greaterOrEquals(usage.ds, exposure.first_exposure_time), 0)) AS eae ON and(equals(base.distinct_id, eae.distinct_id), equals(base.variant, eae.variant)) + GROUP BY base.variant, + base.distinct_id) AS maq + GROUP BY maq.variant + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- # name: TestExperimentQueryRunner.test_query_runner_with_internal_filters_0_person_properties ''' SELECT maq.variant AS variant, diff --git a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py index b144be40ffde0..4aeba4edc721e 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py @@ -5,7 +5,15 @@ from posthog.models.feature_flag.feature_flag import FeatureFlag from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.group.util import create_group +from posthog.settings import ( + OBJECT_STORAGE_ACCESS_KEY_ID, + OBJECT_STORAGE_BUCKET, + OBJECT_STORAGE_ENDPOINT, + OBJECT_STORAGE_SECRET_ACCESS_KEY, + XDIST_SUFFIX, +) from posthog.schema import ( + DataWarehouseNode, EventsNode, ExperimentSignificanceCode, ExperimentTrendsQuery, @@ -22,15 +30,37 @@ ) from freezegun import freeze_time from django.utils import timezone -from datetime import timedelta +from datetime import datetime, timedelta from posthog.test.test_journeys import journeys_for from posthog.models.experiment import Experiment from flaky import flaky from parameterized import parameterized +import s3fs +from pyarrow import parquet as pq +import pyarrow as pa +from boto3 import resource +from botocore.config import Config +from posthog.warehouse.models.credential import DataWarehouseCredential +from posthog.warehouse.models.join import DataWarehouseJoin +from posthog.warehouse.models.table import DataWarehouseTable + +TEST_BUCKET = "test_storage_bucket-posthog.hogql.experiments.queryrunner" + XDIST_SUFFIX @override_settings(IN_UNIT_TESTING=True) class TestExperimentQueryRunner(ClickhouseTestMixin, APIBaseTest): + def teardown_method(self, method) -> None: + s3 = resource( + "s3", + endpoint_url=OBJECT_STORAGE_ENDPOINT, + aws_access_key_id=OBJECT_STORAGE_ACCESS_KEY_ID, + aws_secret_access_key=OBJECT_STORAGE_SECRET_ACCESS_KEY, + config=Config(signature_version="s3v4"), + region_name="us-east-1", + ) + bucket = s3.Bucket(OBJECT_STORAGE_BUCKET) + bucket.objects.filter(Prefix=TEST_BUCKET).delete() + def create_feature_flag(self, key="test-experiment"): return FeatureFlag.objects.create( name=f"Test experiment flag: {key}", @@ -81,6 +111,71 @@ def create_experiment( end_date=end_date, ) + def create_data_warehouse_table_with_usage(self): + if not OBJECT_STORAGE_ACCESS_KEY_ID or not OBJECT_STORAGE_SECRET_ACCESS_KEY: + raise Exception("Missing vars") + + fs = s3fs.S3FileSystem( + client_kwargs={ + "region_name": "us-east-1", + "endpoint_url": OBJECT_STORAGE_ENDPOINT, + "aws_access_key_id": OBJECT_STORAGE_ACCESS_KEY_ID, + "aws_secret_access_key": OBJECT_STORAGE_SECRET_ACCESS_KEY, + }, + ) + + path_to_s3_object = "s3://" + OBJECT_STORAGE_BUCKET + f"/{TEST_BUCKET}" + + table_data = [ + {"id": "1", "ds": "2023-01-01", "userid": "user_control_0", "usage": 1000}, + {"id": "2", "ds": "2023-01-02", "userid": "user_test_1", "usage": 500}, + {"id": "3", "ds": "2023-01-03", "userid": "user_test_2", "usage": 750}, + {"id": "4", "ds": "2023-01-04", "userid": "internal_test_1", "usage": 100000}, + {"id": "5", "ds": "2023-01-06", "userid": "user_test_3", "usage": 800}, + {"id": "6", "ds": "2023-01-07", "userid": "user_extra", "usage": 900}, + ] + + pq.write_to_dataset( + pa.Table.from_pylist(table_data), + path_to_s3_object, + filesystem=fs, + use_dictionary=True, + compression="snappy", + ) + + table_name = "usage" + + credential = DataWarehouseCredential.objects.create( + access_key=OBJECT_STORAGE_ACCESS_KEY_ID, + access_secret=OBJECT_STORAGE_SECRET_ACCESS_KEY, + team=self.team, + ) + + DataWarehouseTable.objects.create( + name=table_name, + url_pattern=f"http://host.docker.internal:19000/{OBJECT_STORAGE_BUCKET}/{TEST_BUCKET}/*.parquet", + format=DataWarehouseTable.TableFormat.Parquet, + team=self.team, + columns={ + "id": "String", + "ds": "Date", + "userid": "String", + "usage": "Int64", + }, + credential=credential, + ) + + DataWarehouseJoin.objects.create( + team=self.team, + source_table_name=table_name, + source_table_key="userid", + joining_table_name="events", + joining_table_key="properties.$user_id", + field_name="events", + configuration={"experiments_optimized": True, "experiments_timestamp_key": "ds"}, + ) + return table_name + @flaky(max_runs=10, min_passes=1) @freeze_time("2020-01-01T12:00:00Z") @snapshot_clickhouse_queries @@ -465,3 +560,342 @@ def test_query_runner_with_internal_filters(self, name: str, filter: dict, expec self.assertEqual(control_result.absolute_exposure, 14) self.assertEqual(test_result.absolute_exposure, 16) + + @parameterized.expand( + [ + [ + "person_properties", + { + "key": "email", + "value": "@posthog.com", + "operator": "not_icontains", + "type": "person", + }, + { + "control_absolute_exposure": 8, + "test_absolute_exposure": 9, + }, + ], + [ + "event_properties", + { + "key": "$host", + "value": "^(localhost|127\\.0\\.0\\.1)($|:)", + "operator": "not_regex", + "type": "event", + }, + { + "control_absolute_exposure": 8, + "test_absolute_exposure": 9, + }, + ], + [ + "feature_flags", + { + "key": "$feature/flag_doesnt_exist", + "type": "event", + "value": ["test", "control"], + "operator": "exact", + }, + { + "control_absolute_exposure": 0, + "test_absolute_exposure": 0, + }, + ], + [ + "cohort_static", + { + "key": "id", + "type": "cohort", + # value is generated in the test + "value": None, + "operator": "exact", + }, + { + "control_absolute_exposure": 1, + "test_absolute_exposure": 1, + }, + ], + [ + "cohort_dynamic", + { + "key": "id", + "type": "cohort", + # value is generated in the test + "value": None, + "operator": "exact", + }, + { + "control_absolute_exposure": 2, + "test_absolute_exposure": 1, + }, + ], + [ + "group", + { + "key": "name", + "type": "group", + # Value is generated in the test + "value": None, + "operator": "exact", + "group_type_index": 0, + }, + { + "control_absolute_exposure": 7, + "test_absolute_exposure": 9, + }, + ], + [ + "element", + { + "key": "tag_name", + "type": "element", + "value": ["button"], + "operator": "exact", + }, + { + "control_absolute_exposure": 0, + "test_absolute_exposure": 0, + }, + ], + ] + ) + @snapshot_clickhouse_queries + def test_query_runner_with_data_warehouse_internal_filters(self, name, filter: dict, filter_expected: dict): + table_name = self.create_data_warehouse_table_with_usage() + + feature_flag = self.create_feature_flag() + experiment = self.create_experiment( + feature_flag=feature_flag, + start_date=datetime(2023, 1, 1), + ) + + feature_flag_property = f"$feature/{feature_flag.key}" + + cohort = None + if name == "cohort_static": + cohort = Cohort.objects.create( + team=self.team, + name="cohort_static", + is_static=True, + ) + filter["value"] = cohort.pk + elif name == "cohort_dynamic": + cohort = Cohort.objects.create( + team=self.team, + name="cohort_dynamic", + groups=[ + { + "properties": [ + {"key": "email", "operator": "not_icontains", "value": "@posthog.com", "type": "person"} + ] + } + ], + ) + filter["value"] = cohort.pk + elif name == "group": + GroupTypeMapping.objects.create( + team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 + ) + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="my_awesome_group", + properties={"name": "Test Group"}, + ) + filter["value"] = ["Test Group"] + + self.team.test_account_filters = [filter] + self.team.save() + count_query = TrendsQuery( + series=[ + DataWarehouseNode( + id=table_name, + distinct_id_field="userid", + id_field="id", + table_name=table_name, + timestamp_field="ds", + math="avg", + math_property="usage", + math_property_type="data_warehouse_properties", + ) + ], + filterTestAccounts=True, + ) + exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=True) + + experiment_query = ExperimentTrendsQuery( + experiment_id=experiment.id, + kind="ExperimentTrendsQuery", + count_query=count_query, + exposure_query=exposure_query, + ) + + experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] + experiment.save() + + # Populate exposure events + for variant, count in [("control", 7), ("test", 9)]: + for i in range(count): + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id=f"distinct_{variant}_{i}", + properties={ + "$feature_flag_response": variant, + feature_flag_property: variant, + "$feature_flag": feature_flag.key, + "$user_id": f"user_{variant}_{i}", + "$group_0": "my_awesome_group", + }, + timestamp=datetime(2023, 1, i + 1), + ) + + _create_person( + team=self.team, + distinct_ids=["distinct_control_0"], + ) + + _create_person( + team=self.team, + distinct_ids=["distinct_test_3"], + ) + + _create_person( + team=self.team, + distinct_ids=["internal_test_1"], + properties={"email": "internal_test_1@posthog.com"}, + ) + # 10th exposure for 'test' + # filtered out by "event_properties" , "person_properties", and "group" + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id="internal_test_1", + properties={ + feature_flag_property: "test", + "$feature_flag_response": "test", + "$feature_flag": feature_flag.key, + "$user_id": "internal_test_1", + "$host": "127.0.0.1", + }, + timestamp=datetime(2023, 1, 3), + ) + + # "user_test_3" first exposure (feature_flag_property="control") is on 2023-01-03 + # "user_test_3" relevant exposure (feature_flag_property="test") is on 2023-01-04 + # "user_test_3" other event (feature_flag_property="control" is on 2023-01-05 + # "user_test_3" purchase is on 2023-01-06 + # "user_test_3" second exposure (feature_flag_property="control") is on 2023-01-09 + # "user_test_3" should fall into the "test" variant, not the "control" variant + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id="distinct_test_3", + properties={ + "$feature_flag_response": "control", + feature_flag_property: "control", + "$feature_flag": feature_flag.key, + "$user_id": "user_test_3", + }, + timestamp=datetime(2023, 1, 3), + ) + _create_event( + team=self.team, + event="Some other event", + distinct_id="distinct_test_3", + properties={ + "$feature_flag_response": "control", + feature_flag_property: "control", + "$feature_flag": feature_flag.key, + "$user_id": "user_test_3", + }, + timestamp=datetime(2023, 1, 5), + ) + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id="distinct_test_3", + properties={ + "$feature_flag_response": "control", + feature_flag_property: "control", + "$feature_flag": feature_flag.key, + "$user_id": "user_test_3", + }, + timestamp=datetime(2023, 1, 9), + ) + + flush_persons_and_events() + + if name == "cohort_static" and cohort: + cohort.insert_users_by_list(["distinct_control_0", "internal_test_1"]) + self.assertEqual(cohort.people.count(), 2) + elif name == "cohort_dynamic" and cohort: + cohort.calculate_people_ch(pending_version=0) + + query_runner = ExperimentQueryRunner( + query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team + ) + # "feature_flags" and "element" filter out all events + if name == "feature_flags" or name == "element": + with freeze_time("2023-01-07"), self.assertRaises(ValueError) as context: + query_runner.calculate() + + self.assertEqual(context.exception.args[0], "Control variant not found in experiment results") + else: + with freeze_time("2023-01-07"): + result = query_runner.calculate() + + trend_result = cast(ExperimentTrendsQueryResponse, result) + + self.assertEqual(len(result.variants), 2) + + control_result = next(variant for variant in trend_result.variants if variant.key == "control") + test_result = next(variant for variant in trend_result.variants if variant.key == "test") + + self.assertEqual(control_result.absolute_exposure, filter_expected["control_absolute_exposure"]) + self.assertEqual(test_result.absolute_exposure, filter_expected["test_absolute_exposure"]) + + # Run the query again without filtering + count_query = TrendsQuery( + series=[ + DataWarehouseNode( + id=table_name, + distinct_id_field="userid", + id_field="id", + table_name=table_name, + timestamp_field="ds", + math="avg", + math_property="usage", + math_property_type="data_warehouse_properties", + ) + ], + filterTestAccounts=False, + ) + exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=False) + + experiment_query = ExperimentTrendsQuery( + experiment_id=experiment.id, + kind="ExperimentTrendsQuery", + count_query=count_query, + exposure_query=exposure_query, + ) + + experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] + experiment.save() + + query_runner = ExperimentQueryRunner( + query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team + ) + with freeze_time("2023-01-07"): + result = query_runner.calculate() + + trend_result = cast(ExperimentTrendsQueryResponse, result) + + self.assertEqual(len(result.variants), 2) + + control_result = next(variant for variant in trend_result.variants if variant.key == "control") + test_result = next(variant for variant in trend_result.variants if variant.key == "test") + + self.assertEqual(control_result.absolute_exposure, 8) + self.assertEqual(test_result.absolute_exposure, 10) From 49b39b53663cd899cca58e3f5db5ee99e2d2233d Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 5 Feb 2025 13:04:13 -0800 Subject: [PATCH 5/7] Stub `ExperimentQuery` --- .../experiments/experiment_query_runner.py | 4 +- .../test/test_experiment_query_runner.py | 42 +++++++------------ posthog/schema.py | 15 +++++++ 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/posthog/hogql_queries/experiments/experiment_query_runner.py b/posthog/hogql_queries/experiments/experiment_query_runner.py index 7b34b8f3bae6a..5ddd3c3a8f8c1 100644 --- a/posthog/hogql_queries/experiments/experiment_query_runner.py +++ b/posthog/hogql_queries/experiments/experiment_query_runner.py @@ -29,7 +29,7 @@ CachedExperimentTrendsQueryResponse, DataWarehouseNode, ExperimentSignificanceCode, - ExperimentTrendsQuery, + ExperimentQuery, ExperimentTrendsQueryResponse, ExperimentVariantTrendsBaseStats, DateRange, @@ -41,7 +41,7 @@ class ExperimentQueryRunner(QueryRunner): - query: ExperimentTrendsQuery + query: ExperimentQuery response: ExperimentTrendsQueryResponse cached_response: CachedExperimentTrendsQueryResponse diff --git a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py index 4aeba4edc721e..1c33eac399623 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py @@ -15,8 +15,8 @@ from posthog.schema import ( DataWarehouseNode, EventsNode, + ExperimentQuery, ExperimentSignificanceCode, - ExperimentTrendsQuery, ExperimentTrendsQueryResponse, TrendsQuery, ) @@ -188,9 +188,9 @@ def test_query_runner_standard_flow_v2_stats(self): ff_property = f"$feature/{feature_flag.key}" count_query = TrendsQuery(series=[EventsNode(event="$pageview")]) - experiment_query = ExperimentTrendsQuery( + experiment_query = ExperimentQuery( experiment_id=experiment.id, - kind="ExperimentTrendsQuery", + kind="ExperimentQuery", count_query=count_query, exposure_query=None, ) @@ -258,9 +258,7 @@ def test_query_runner_standard_flow_v2_stats(self): flush_persons_and_events() - query_runner = ExperimentQueryRunner( - query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team - ) + query_runner = ExperimentQueryRunner(query=ExperimentQuery(**experiment.metrics[0]["query"]), team=self.team) self.assertEqual(query_runner.stats_version, 2) result = query_runner.calculate() @@ -445,9 +443,9 @@ def test_query_runner_with_internal_filters(self, name: str, filter: dict, expec feature_flag_property = f"$feature/{feature_flag.key}" count_query = TrendsQuery(series=[EventsNode(event="$pageview")], filterTestAccounts=True) - experiment_query = ExperimentTrendsQuery( + experiment_query = ExperimentQuery( experiment_id=experiment.id, - kind="ExperimentTrendsQuery", + kind="ExperimentQuery", count_query=count_query, exposure_query=None, ) @@ -517,9 +515,7 @@ def test_query_runner_with_internal_filters(self, name: str, filter: dict, expec elif name == "cohort_dynamic" and cohort: cohort.calculate_people_ch(pending_version=0) - query_runner = ExperimentQueryRunner( - query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team - ) + query_runner = ExperimentQueryRunner(query=ExperimentQuery(**experiment.metrics[0]["query"]), team=self.team) # "feature_flags" and "element" filter out all events if name == "feature_flags" or name == "element": with self.assertRaises(ValueError) as context: @@ -538,9 +534,9 @@ def test_query_runner_with_internal_filters(self, name: str, filter: dict, expec ## Run again with filterTestAccounts=False count_query = TrendsQuery(series=[EventsNode(event="$pageview")], filterTestAccounts=False) - experiment_query = ExperimentTrendsQuery( + experiment_query = ExperimentQuery( experiment_id=experiment.id, - kind="ExperimentTrendsQuery", + kind="ExperimentQuery", count_query=count_query, exposure_query=None, ) @@ -548,9 +544,7 @@ def test_query_runner_with_internal_filters(self, name: str, filter: dict, expec experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] experiment.save() - query_runner = ExperimentQueryRunner( - query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team - ) + query_runner = ExperimentQueryRunner(query=ExperimentQuery(**experiment.metrics[0]["query"]), team=self.team) result = query_runner.calculate() trend_result = cast(ExperimentTrendsQueryResponse, result) @@ -724,9 +718,9 @@ def test_query_runner_with_data_warehouse_internal_filters(self, name, filter: d ) exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=True) - experiment_query = ExperimentTrendsQuery( + experiment_query = ExperimentQuery( experiment_id=experiment.id, - kind="ExperimentTrendsQuery", + kind="ExperimentQuery", count_query=count_query, exposure_query=exposure_query, ) @@ -833,9 +827,7 @@ def test_query_runner_with_data_warehouse_internal_filters(self, name, filter: d elif name == "cohort_dynamic" and cohort: cohort.calculate_people_ch(pending_version=0) - query_runner = ExperimentQueryRunner( - query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team - ) + query_runner = ExperimentQueryRunner(query=ExperimentQuery(**experiment.metrics[0]["query"]), team=self.team) # "feature_flags" and "element" filter out all events if name == "feature_flags" or name == "element": with freeze_time("2023-01-07"), self.assertRaises(ValueError) as context: @@ -874,9 +866,9 @@ def test_query_runner_with_data_warehouse_internal_filters(self, name, filter: d ) exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=False) - experiment_query = ExperimentTrendsQuery( + experiment_query = ExperimentQuery( experiment_id=experiment.id, - kind="ExperimentTrendsQuery", + kind="ExperimentQuery", count_query=count_query, exposure_query=exposure_query, ) @@ -884,9 +876,7 @@ def test_query_runner_with_data_warehouse_internal_filters(self, name, filter: d experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] experiment.save() - query_runner = ExperimentQueryRunner( - query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team - ) + query_runner = ExperimentQueryRunner(query=ExperimentQuery(**experiment.metrics[0]["query"]), team=self.team) with freeze_time("2023-01-07"): result = query_runner.calculate() diff --git a/posthog/schema.py b/posthog/schema.py index 6c5fa6a4dea46..6b9e009353486 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -7196,6 +7196,21 @@ class ExperimentTrendsQuery(BaseModel): response: Optional[ExperimentTrendsQueryResponse] = None +class ExperimentQuery(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + count_query: TrendsQuery + experiment_id: Optional[int] = None + exposure_query: Optional[TrendsQuery] = None + kind: Literal["ExperimentQuery"] = "ExperimentQuery" + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + name: Optional[str] = None + response: Optional[ExperimentTrendsQueryResponse] = None + + class FunnelPathsFilter(BaseModel): model_config = ConfigDict( extra="forbid", From 37d6c0737c1d1c569ae3d9b913bf88ebfe631613 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Wed, 5 Feb 2025 13:19:26 -0800 Subject: [PATCH 6/7] Add a skipped test for a complex join --- .../test/test_experiment_query_runner.py | 251 ++++++++++++++++++ 1 file changed, 251 insertions(+) diff --git a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py index 1c33eac399623..7f304af76238a 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_query_runner.py @@ -1,5 +1,6 @@ from typing import cast from django.test import override_settings +import pytest from posthog.hogql_queries.experiments.experiment_query_runner import ExperimentQueryRunner from posthog.models.cohort.cohort import Cohort from posthog.models.feature_flag.feature_flag import FeatureFlag @@ -176,6 +177,162 @@ def create_data_warehouse_table_with_usage(self): ) return table_name + def create_data_warehouse_table_with_subscriptions(self): + if not OBJECT_STORAGE_ACCESS_KEY_ID or not OBJECT_STORAGE_SECRET_ACCESS_KEY: + raise Exception("Missing vars") + + fs = s3fs.S3FileSystem( + client_kwargs={ + "region_name": "us-east-1", + "endpoint_url": OBJECT_STORAGE_ENDPOINT, + "aws_access_key_id": OBJECT_STORAGE_ACCESS_KEY_ID, + "aws_secret_access_key": OBJECT_STORAGE_SECRET_ACCESS_KEY, + }, + ) + + path_to_s3_object = "s3://" + OBJECT_STORAGE_BUCKET + f"/{TEST_BUCKET}" + + credential = DataWarehouseCredential.objects.create( + access_key=OBJECT_STORAGE_ACCESS_KEY_ID, + access_secret=OBJECT_STORAGE_SECRET_ACCESS_KEY, + team=self.team, + ) + + subscription_table_data = [ + { + "subscription_id": "1", + "subscription_created_at": datetime(2023, 1, 2), + "subscription_customer_id": "1", + "subscription_amount": 100, + }, + { + "subscription_id": "2", + "subscription_created_at": datetime(2023, 1, 3), + "subscription_customer_id": "2", + "subscription_amount": 50, + }, + { + "subscription_id": "3", + "subscription_created_at": datetime(2023, 1, 4), + "subscription_customer_id": "3", + "subscription_amount": 75, + }, + { + "subscription_id": "4", + "subscription_created_at": datetime(2023, 1, 5), + "subscription_customer_id": "4", + "subscription_amount": 80, + }, + { + "subscription_id": "5", + "subscription_created_at": datetime(2023, 1, 6), + "subscription_customer_id": "5", + "subscription_amount": 90, + }, + ] + + pq.write_to_dataset( + pa.Table.from_pylist(subscription_table_data), + path_to_s3_object, + filesystem=fs, + use_dictionary=True, + compression="snappy", + ) + + subscription_table_name = "subscriptions" + + DataWarehouseTable.objects.create( + name=subscription_table_name, + url_pattern=f"http://host.docker.internal:19000/{OBJECT_STORAGE_BUCKET}/{TEST_BUCKET}/*.parquet", + format=DataWarehouseTable.TableFormat.Parquet, + team=self.team, + columns={ + "subscription_id": "String", + "subscription_created_at": "DateTime64(3, 'UTC')", + "subscription_customer_id": "String", + "subscription_amount": "Int64", + }, + credential=credential, + ) + + customer_table_data = [ + { + "customer_id": "1", + "customer_created_at": datetime(2023, 1, 1), + "customer_name": "John Doe", + "customer_email": "john.doe@example.com", + }, + { + "customer_id": "2", + "customer_created_at": datetime(2023, 1, 2), + "customer_name": "Jane Doe", + "customer_email": "jane.doe@example.com", + }, + { + "customer_id": "3", + "customer_created_at": datetime(2023, 1, 3), + "customer_name": "John Smith", + "customer_email": "john.smith@example.com", + }, + { + "customer_id": "4", + "customer_created_at": datetime(2023, 1, 6), + "customer_name": "Jane Smith", + "customer_email": "jane.smith@example.com", + }, + { + "customer_id": "5", + "customer_created_at": datetime(2023, 1, 7), + "customer_name": "John Doe Jr", + "customer_email": "john.doejr@example.com", + }, + ] + + pq.write_to_dataset( + pa.Table.from_pylist(customer_table_data), + path_to_s3_object, + filesystem=fs, + use_dictionary=True, + compression="snappy", + ) + + customer_table_name = "customers" + + DataWarehouseTable.objects.create( + name=customer_table_name, + url_pattern=f"http://host.docker.internal:19000/{OBJECT_STORAGE_BUCKET}/{TEST_BUCKET}/*.parquet", + format=DataWarehouseTable.TableFormat.Parquet, + team=self.team, + columns={ + "customer_id": "String", + "customer_created_at": "DateTime64(3, 'UTC')", + "customer_name": "String", + "customer_email": "String", + }, + credential=credential, + ) + + DataWarehouseJoin.objects.create( + team=self.team, + source_table_name=subscription_table_name, + source_table_key="subscription_customer_id", + joining_table_name=customer_table_name, + joining_table_key="customer_id", + field_name="subscription_customer", + ) + + DataWarehouseJoin.objects.create( + team=self.team, + source_table_name=subscription_table_name, + source_table_key="subscription_customer.customer_email", + joining_table_name="events", + joining_table_key="person.properties.email", + field_name="events", + configuration={"experiments_optimized": True, "experiments_timestamp_key": "subscription_created_at"}, + ) + + return subscription_table_name + @flaky(max_runs=10, min_passes=1) @freeze_time("2020-01-01T12:00:00Z") @snapshot_clickhouse_queries @@ -889,3 +1046,97 @@ def test_query_runner_with_data_warehouse_internal_filters(self, name, filter: d self.assertEqual(control_result.absolute_exposure, 8) self.assertEqual(test_result.absolute_exposure, 10) + + @pytest.mark.skip(reason="Doesn't work with the new query runner") + def test_query_runner_with_data_warehouse_subscriptions_table(self): + table_name = self.create_data_warehouse_table_with_subscriptions() + + feature_flag = self.create_feature_flag() + experiment = self.create_experiment( + feature_flag=feature_flag, + start_date=datetime(2023, 1, 1), + end_date=datetime(2023, 1, 10), + ) + + feature_flag_property = f"$feature/{feature_flag.key}" + + count_query = TrendsQuery( + series=[ + DataWarehouseNode( + id=table_name, + distinct_id_field="subscription_customer_id", + id_field="id", + table_name=table_name, + timestamp_field="subscription_created_at", + math="total", + ) + ] + ) + + experiment_query = ExperimentQuery( + experiment_id=experiment.id, + kind="ExperimentQuery", + count_query=count_query, + exposure_query=None, + ) + + experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] + experiment.save() + + # Populate exposure events + for variant, count in [("control", 7), ("test", 9)]: + for i in range(count): + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id=f"user_{variant}_{i}", + properties={ + "$feature_flag_response": variant, + feature_flag_property: variant, + "$feature_flag": feature_flag.key, + }, + timestamp=datetime(2023, 1, i + 1), + ) + + _create_person( + team=self.team, + distinct_ids=["user_control_0"], + properties={"email": "john.doe@example.com"}, + ) + + _create_person( + team=self.team, + distinct_ids=["user_test_1"], + properties={"email": "jane.doe@example.com"}, + ) + + _create_person( + team=self.team, + distinct_ids=["user_test_2"], + properties={"email": "john.smith@example.com"}, + ) + + _create_person( + team=self.team, + distinct_ids=["user_test_3"], + properties={"email": "jane.smith@example.com"}, + ) + + flush_persons_and_events() + + query_runner = ExperimentQueryRunner(query=ExperimentQuery(**experiment.metrics[0]["query"]), team=self.team) + + with freeze_time("2023-01-10"): + result = query_runner.calculate() + + trend_result = cast(ExperimentTrendsQueryResponse, result) + + self.assertEqual(len(result.variants), 2) + + control_result = next(variant for variant in trend_result.variants if variant.key == "control") + test_result = next(variant for variant in trend_result.variants if variant.key == "test") + + self.assertEqual(control_result.count, 1) + self.assertEqual(test_result.count, 3) + self.assertEqual(control_result.absolute_exposure, 7) + self.assertEqual(test_result.absolute_exposure, 9) From 26dd94ec8c155845dcb6479a7910e502b97dbe5a Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 5 Feb 2025 21:42:49 +0000 Subject: [PATCH 7/7] Update query snapshots --- .../test/__snapshots__/test_experiment_query_runner.ambr | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr b/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr index 5aa00d6278085..a4654b0d35d45 100644 --- a/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr +++ b/posthog/hogql_queries/experiments/test/__snapshots__/test_experiment_query_runner.ambr @@ -69,7 +69,7 @@ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) LEFT JOIN (SELECT person.id AS id, - nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email FROM person WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), (SELECT person.id AS id, max(person.version) AS version @@ -102,7 +102,7 @@ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) LEFT JOIN (SELECT person.id AS id, - nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email FROM person WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), (SELECT person.id AS id, max(person.version) AS version @@ -846,7 +846,7 @@ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) LEFT JOIN (SELECT person.id AS id, - nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email FROM person WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), (SELECT person.id AS id, max(person.version) AS version @@ -878,7 +878,7 @@ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) LEFT JOIN (SELECT person.id AS id, - nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email FROM person WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), (SELECT person.id AS id, max(person.version) AS version