From 50999acb179e5df8239e7b0b8097439e4a249e18 Mon Sep 17 00:00:00 2001 From: smlmbrt Date: Tue, 23 Aug 2022 16:53:01 +0100 Subject: [PATCH] Minor description edits --- pgscatalog_utils/download/download_scorefile.py | 4 ++++ pgscatalog_utils/match/match_variants.py | 8 ++++---- pgscatalog_utils/scorefile/combine_scorefiles.py | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pgscatalog_utils/download/download_scorefile.py b/pgscatalog_utils/download/download_scorefile.py index 0bd3298..30f8ac8 100644 --- a/pgscatalog_utils/download/download_scorefile.py +++ b/pgscatalog_utils/download/download_scorefile.py @@ -88,6 +88,10 @@ def _description_text() -> str: The PGS Catalog API is queried to get a list of scoring file URLs. Scoring files are downloaded via FTP to a specified directory. PGS Catalog scoring files are staged with the name: + + {PGS_ID}.txt.gz + + If a valid build is specified harmonized files are downloaded as: {PGS_ID}_hmPOS_{genome_build}.txt.gz diff --git a/pgscatalog_utils/match/match_variants.py b/pgscatalog_utils/match/match_variants.py index 55d7a56..0d31da6 100644 --- a/pgscatalog_utils/match/match_variants.py +++ b/pgscatalog_utils/match/match_variants.py @@ -123,12 +123,12 @@ def _description_text() -> str: makes matching faster and simpler. Target genomes can be in plink1 bim format or plink2 pvar - format. Variant IDs should be unique. + format. Variant IDs should be unique so that they can be specified + in the scoring file as: variant_id|effect_allele|[effect_weight column(s)...] Only one set of target genomes should be matched at a time. Don't - try to match target genomes from different plink - filesets. Matching against a set of chromosomes from the same - fileset is OK (see --split). + try to match target genomes from different plink filesets. Matching + against a set of chromosomes from the same fileset is OK (see --split). ''') diff --git a/pgscatalog_utils/scorefile/combine_scorefiles.py b/pgscatalog_utils/scorefile/combine_scorefiles.py index c9e80f4..35d9b85 100644 --- a/pgscatalog_utils/scorefile/combine_scorefiles.py +++ b/pgscatalog_utils/scorefile/combine_scorefiles.py @@ -43,7 +43,8 @@ def _read_and_melt(path, drop_missing: bool = False): def _description_text() -> str: return textwrap.dedent('''\ - Combine multiple scoring files in PGS Catalog format to a 'long' + Combine multiple scoring files in PGS Catalog format (see + https://www.pgscatalog.org/downloads/ for details) to a 'long' table, and optionally liftover genomic coordinates to GRCh37 or GRCh38. Custom scorefiles in PGS Catalog format can be combined with PGS Catalog scoring files. The program can accept a mix of