Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DC-3789] Add or edit queries for self reported population in the clean and output notebooks #1864

Merged
merged 4 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,16 @@
# -

# ## Person vs Person_Ext in Destination Dataset
# Make sure the destination person and the destination person_ext tables have harmonious data for the five appended columns to the person table.
# Make sure the destination person and the destination person_ext tables have harmonious data for the eight appended columns to the person table.
# 1. sex_at_birth_concept_id
# 2. sex_at_birth_source_concept_id,
# 3. sex_at_birth_source_value
# 4. state_of_residence_concept_id
# 5. state_of_residence_source_value <br>
# 5. state_of_residence_source_value
# 6. self_reported_population_concept_id
# 7. self_reported_population_source_value
# 8. self_reported_population_source_concept_id<br>
#
#
# Investigate any failed output.

Expand All @@ -114,6 +118,13 @@
,COUNTIF(p.sex_at_birth_source_value <> pe.sex_at_birth_source_value) as ne_sex_at_birth_source_value
,COUNTIF((p.sex_at_birth_source_value IS NULL AND pe.sex_at_birth_source_value IS NOT NULL)
OR (p.sex_at_birth_source_value IS NOT NULL AND pe.sex_at_birth_source_value IS NULL)) as ne_nulls_sex_at_birth_source_value
-- check self reported population columns --
,COUNTIF((p.self_reported_population_concept_id IS NULL AND pe.self_reported_population_concept_id IS NOT NULL)
OR (p.self_reported_population_concept_id IS NOT NULL AND pe.self_reported_population_concept_id IS NULL)) as ne_nulls_self_reported_population_concept_id
,COUNTIF((p.self_reported_population_source_value IS NULL AND pe.self_reported_population_source_value IS NOT NULL)
OR (p.self_reported_population_source_value IS NOT NULL AND pe.self_reported_population_source_value IS NULL)) as ne_nulls_self_reported_population_source_value
,COUNTIF((p.self_reported_population_source_concept_id IS NULL AND pe.self_reported_population_source_concept_id IS NOT NULL)
OR (p.self_reported_population_source_concept_id IS NOT NULL AND pe.self_reported_population_source_concept_id IS NULL)) as ne_nulls_self_reported_population_source_concept_id
FROM `{{dest_project_id}}.{{dest_dataset_id}}.person` p
JOIN `{{dest_project_id}}.{{dest_dataset_id}}.person_ext` pe
USING (person_id)
Expand Down Expand Up @@ -227,20 +238,56 @@
END AS result
FROM calculation AS c

UNION ALL

SELECT
'nulls_self_reported_population_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
FROM calculation AS c

UNION ALL

SELECT
'self_reported_population_source_value_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_value > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
FROM calculation AS c

UNION ALL

SELECT
'null_self_reported_population_source_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
FROM calculation AS c


''')
query = tpl.render(dest_project_id=dest_project_id,
dest_dataset_id=dest_dataset_id)
execute(client, query)

# ## Person in destination Dataset vs Person_Ext in Source Dataset
# Make sure the destination person and source person_ext tables have harmonious data for the five appended columns to the person table.
# Make sure the destination person and source person_ext tables have harmonious data for the eight appended columns to the person table.
#
# 1. sex_at_birth_concept_id
# 2. sex_at_birth_source_concept_id
# 3. sex_at_birth_source_value
# 4. state_of_residence_concept_id
# 5. state_of_residence_source_value.<br>
# 5. state_of_residence_source_value
# 6. self_reported_population_concept_id
# 7. self_reported_population_source_value
# 8. self_reported_population_source_concept_id<br>
#
# Investigate any failed output.

Expand All @@ -264,6 +311,13 @@
,COUNTIF(p.sex_at_birth_source_value <> pe.sex_at_birth_source_value) AS ne_sex_at_birth_source_value
,COUNTIF((p.sex_at_birth_source_value IS NULL AND pe.sex_at_birth_source_value IS NOT NULL)
OR(p.sex_at_birth_source_value IS NOT NULL AND pe.sex_at_birth_source_value IS NULL)) AS ne_nulls_sex_at_birth_source_value
-- check self reported population columns --
,COUNTIF((p.self_reported_population_concept_id IS NULL AND pe.self_reported_population_concept_id IS NOT NULL)
OR (p.self_reported_population_concept_id IS NOT NULL AND pe.self_reported_population_concept_id IS NULL)) as ne_nulls_self_reported_population_concept_id
,COUNTIF((p.self_reported_population_source_value IS NULL AND pe.self_reported_population_source_value IS NOT NULL)
OR (p.self_reported_population_source_value IS NOT NULL AND pe.self_reported_population_source_value IS NULL)) as ne_nulls_self_reported_population_source_value
,COUNTIF((p.self_reported_population_source_concept_id IS NULL AND pe.self_reported_population_source_concept_id IS NOT NULL)
OR (p.self_reported_population_source_concept_id IS NOT NULL AND pe.self_reported_population_source_concept_id IS NULL)) as ne_nulls_self_reported_population_source_concept_id
FROM `{{dest_project_id}}.{{dest_dataset_id}}.person` p
JOIN `{{src_project_id}}.{{src_dataset_id}}.person_ext` pe
USING(person_id))
Expand Down Expand Up @@ -378,6 +432,39 @@
ELSE 'passed'
END as result
FROM calculation as c

UNION ALL

SELECT
'nulls_self_reported_population_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
FROM calculation AS c

UNION ALL

SELECT
'self_reported_population_source_value_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_value > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
FROM calculation AS c

UNION ALL

SELECT
'null_self_reported_population_source_concept_id_check' AS check
,CASE
WHEN c.ne_nulls_self_reported_population_source_concept_id > 0
THEN 'FAILED'
ELSE 'passed'
END AS result
FROM calculation AS c
''')
query = tpl.render(src_project_id=src_project_id,
dest_project_id=dest_project_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,76 @@
result
# -

Query9 "Race Ethnicity: person_ext self reported population DC-3789"
Verify that the person_ext self_reported_population fields are populated correctly.

# has to be deid_clean
query = JINJA_ENV.from_string("""
WITH obs as
(SELECT person_id, races, c_races
FROM (
SELECT person_id, ARRAY_TO_STRING(ARRAY_AGG(CAST(value_source_concept_id AS STRING)), '|') races, ARRAY_TO_STRING(ARRAY_AGG(CAST(value_as_concept_id AS STRING)), '|') c_races
FROM (
SELECT person_id, value_as_concept_id, value_source_concept_id
FROM `{{project_id}}.{{deid_clean_cdr}}.observation`
WHERE observation_source_concept_id = 1586140
ORDER BY person_id, value_source_concept_id)
GROUP BY 1))

SELECT DISTINCT races, c_races, race_source_value, ethnicity_source_value, race_source_concept_id, race_concept_id, self_reported_population_source_value, self_reported_population_source_concept_id, self_reported_population_concept_id
FROM obs
LEFT JOIN `{{project_id}}.{{deid_clean_cdr}}.person`
USING (person_id)
LEFT JOIN `{{project_id}}.{{deid_clean_cdr}}.person_ext`
USING (person_id)
WHERE
-- check srp column multi pop --
(REGEXP_CONTAINS(obs.c_races, r'\|') AND self_reported_population_source_value != 'WhatRaceEthnicity_GeneralizedMultPopulations' )
-- check srp column single pop not hispanic--
OR (NOT (REGEXP_CONTAINS(obs.c_races, r'\|') )) AND (race_source_value != self_reported_population_source_value AND (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_population_source_value != 'WhatRaceEthnicity_Hispanic'))
-- check srp column single pop hispanic--
OR (race_source_value = 'AoUDRC_NoneIndicated' AND self_reported_population_source_value != 'WhatRaceEthnicity_Hispanic')
-- check only expected srpsv exist --
OR (self_reported_population_source_value NOT IN ('WhatRaceEthnicity_GeneralizedMultPopulations','WhatRaceEthnicity_GeneralizedPopulation', 'WhatRaceEthnicity_Black','WhatRaceEthnicity_White','WhatRaceEthnicity_Asian' ,'WhatRaceEthnicity_Hispanic','PMI_PreferNotToAnswer', 'PMI_Skip', 'WhatRaceEthnicity_RaceEthnicityNoneOfThese','WhatRaceEthnicity_AIAN',
'WhatRaceEthnicity_MENA','WhatRaceEthnicity_NHPI'))
-- check for expected concept_ids per srpsv --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_GeneralizedMultPopulations' AND (self_reported_population_concept_id != 2000000008 OR self_reported_population_source_concept_id != 2000000008))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_GeneralizedPopulation' AND (self_reported_population_concept_id != 2000000001 OR self_reported_population_source_concept_id != 2000000001))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Black' AND (self_reported_population_concept_id != 8516 OR self_reported_population_source_concept_id != 1586143))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_White' AND (self_reported_population_concept_id != 8527 OR self_reported_population_source_concept_id != 1586146))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Asian' AND (self_reported_population_concept_id != 8515 OR self_reported_population_source_concept_id != 1586142))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_Hispanic' AND (self_reported_population_concept_id != 1586147 OR self_reported_population_source_concept_id != 1586147))
OR (self_reported_population_source_value = 'PMI_PreferNotToAnswer' AND (self_reported_population_concept_id != 1177221 OR self_reported_population_source_concept_id != 903079))
OR (self_reported_population_source_value = 'PMI_Skip' AND (self_reported_population_concept_id != 903096 OR self_reported_population_source_concept_id != 903096))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_RaceEthnicityNoneOfThese' AND (self_reported_population_concept_id != 45882607 OR self_reported_population_source_concept_id != 1586148))
OR (self_reported_population_source_value = 'WhatRaceEthnicity_AIAN' AND (self_reported_population_concept_id != 8657 OR self_reported_population_source_concept_id != 1586141)) -- ct only --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_MENA' AND (self_reported_population_concept_id != 38003615 OR self_reported_population_source_concept_id != 1586144)) -- ct only --
OR (self_reported_population_source_value = 'WhatRaceEthnicity_NHPI' AND (self_reported_population_concept_id != 8557 OR self_reported_population_source_concept_id != 1586145)) -- ct only --
ORDER BY 1,2

""")
q = query.render(project_id=project_id, deid_clean_cdr=deid_clean_cdr)
df1 = execute(client, q)
if df1.eq(0).any().any():
summary = summary.append(
{
'query':
'Query 9 Race Ethnicity: person_ext self reported population',
'result':
'PASS'
},
ignore_index=True)
else:
summary = summary.append(
{
'query':
'Query 9 Race Ethnicity: person_ext self reported population',
'result':
'Failure'
},
ignore_index=True)
df1

# # Summary_row_ICD_suppression


Expand Down