Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev' into opt_chrom
Browse files Browse the repository at this point in the history
  • Loading branch information
smlmbrt committed Nov 21, 2022
2 parents c43e85d + 9741d51 commit 01a92ac
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions pgscatalog_utils/match/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,17 @@ def write_scorefiles(matches: pl.LazyFrame, split: bool, dataset: str):
dominant: pl.LazyFrame
recessive: pl.LazyFrame

# collect and cache minimum required columns
min_cols: list[str] = ['accession', 'effect_type', 'chr_name', 'ID', 'matched_effect_allele', 'effect_weight']
matches: pl.LazyFrame = (matches.select(min_cols)
.collect()
.lazy())

if split:
chroms: list[str] = matches.select("chr_name").unique().collect().get_column("chr_name").to_list()
for chrom in chroms:
# 1. filter by chromosome & collect to cache!
chrom_df: pl.LazyFrame = matches.filter(pl.col('chr_name') == chrom).collect().lazy()
# 1. filter by chromosome
chrom_df: pl.LazyFrame = matches.filter(pl.col('chr_name') == chrom)
# 2. split by effect type
additive, dominant, recessive = _split_effect_type(chrom_df)

Expand All @@ -44,9 +50,6 @@ def write_scorefiles(matches: pl.LazyFrame, split: bool, dataset: str):
# 4. pivot and write!
_write_split(deduped, chrom, dataset)
else:
# collect to cache!
matches: pl.LazyFrame = matches.collect().lazy()

# 1. split by effect type
additive, dominant, recessive = _split_effect_type(matches)

Expand Down

0 comments on commit 01a92ac

Please sign in to comment.