Skip to content

Commit

Permalink
fix: move rsId and concordance check outside the filter function
Browse files Browse the repository at this point in the history
windows cannot be applied inside where clauses
  • Loading branch information
ireneisdoomed committed Apr 6, 2023
1 parent 6cc9af1 commit c57919d
Showing 1 changed file with 30 additions and 19 deletions.
49 changes: 30 additions & 19 deletions src/otg/dataset/study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,29 +595,40 @@ def _map_to_variant_annotation_variants(

# Semi-resolved ids (still contains duplicates when conclusion was not possible to make
# based on rsIds or allele concordance)
filtered_associations = gwas_associations_subset.join(
f.broadcast(va_subset),
on=["chromosome", "position"],
how="left",
).filter(
# Filter out rows where GWAS Catalog rsId does not match with GnomAD rsId,
# but there is corresponding variant for the same association
StudyLocusGWASCatalog._flag_mappings_to_retain(
f.col("studyLocusId"),
StudyLocusGWASCatalog._compare_rsids(
f.col("rsIdsGnomad"), f.col("rsIdsGwasCatalog")
filtered_associations = (
gwas_associations_subset.join(
f.broadcast(va_subset),
on=["chromosome", "position"],
how="left",
)
.withColumn(
"rsIdFilter",
StudyLocusGWASCatalog._flag_mappings_to_retain(
f.col("studyLocusId"),
StudyLocusGWASCatalog._compare_rsids(
f.col("rsIdsGnomad"), f.col("rsIdsGwasCatalog")
),
),
)
# or filter out rows where GWAS Catalog alleles are not concordant with GnomAD alleles,
# but there is corresponding variant for the same association
| StudyLocusGWASCatalog._flag_mappings_to_retain(
f.col("studyLocusId"),
StudyLocusGWASCatalog._check_concordance(
f.col("riskAllele"),
f.col("referenceAllele"),
f.col("alternateAllele"),
.withColumn(
"concordanceFilter",
StudyLocusGWASCatalog._flag_mappings_to_retain(
f.col("studyLocusId"),
StudyLocusGWASCatalog._check_concordance(
f.col("riskAllele"),
f.col("referenceAllele"),
f.col("alternateAllele"),
),
),
)
.filter(
# Filter out rows where GWAS Catalog rsId does not match with GnomAD rsId,
# but there is corresponding variant for the same association
f.col("rsIdFilter")
# or filter out rows where GWAS Catalog alleles are not concordant with GnomAD alleles,
# but there is corresponding variant for the same association
| f.col("concordanceFilter")
)
)

# Keep only highest maxMaf variant per studyLocusId
Expand Down

0 comments on commit c57919d

Please sign in to comment.