From cd3ed3cc05c09922cfa41bb87a423a584e514b8c Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Thu, 5 Jan 2023 10:43:16 +0100 Subject: [PATCH 1/4] add db patch to remove variants from beginning and end of genome --- .../20230104_patch_start_end_mutations.sql | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 scripts/sql/20230104_patch_start_end_mutations.sql diff --git a/scripts/sql/20230104_patch_start_end_mutations.sql b/scripts/sql/20230104_patch_start_end_mutations.sql new file mode 100644 index 00000000..2e6ce948 --- /dev/null +++ b/scripts/sql/20230104_patch_start_end_mutations.sql @@ -0,0 +1,29 @@ + + + +delete from variant_observation_on where position <= 50; +delete from variant_on where position <= 50; +delete from variant_observation_on where position >= 29804; +delete from variant_on where position >= 29804; + +delete from subclonal_variant_observation_on where position <= 50; +delete from subclonal_variant_on where position <= 50; +delete from subclonal_variant_observation_on where position >= 29804; +delete from subclonal_variant_on where position >= 29804; + +delete from low_frequency_variant_observation_on where position <= 50; +delete from low_frequency_variant_on where position <= 50; +delete from low_frequency_variant_observation_on where position >= 29804; +delete from low_frequency_variant_on where position >= 29804; + +delete from lq_clonal_variant_observation_on where position <= 50; +delete from lq_clonal_variant_on where position <= 50; +delete from lq_clonal_variant_observation_on where position >= 29804; +delete from lq_clonal_variant_on where position >= 29804; + +delete from variant_observation_covid19portal_on where position <= 50; +delete from variant_covid19portal_on where position <= 50; +delete from variant_observation_covid19portal_on where position >= 29804; +delete from variant_covid19portal_on where position >= 29804; + + From 5d285a1043b24597970134ddf7ce865cb1de4513 Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Thu, 5 Jan 2023 10:43:35 +0100 Subject: [PATCH 2/4] add db patch to set the right collection date in the variant observations --- .../sql/20230105_path_collection_date_in_variants.sql | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 scripts/sql/20230105_path_collection_date_in_variants.sql diff --git a/scripts/sql/20230105_path_collection_date_in_variants.sql b/scripts/sql/20230105_path_collection_date_in_variants.sql new file mode 100644 index 00000000..c9e2713e --- /dev/null +++ b/scripts/sql/20230105_path_collection_date_in_variants.sql @@ -0,0 +1,11 @@ + +-- The date stored in the variant observations tables need to be patched with the collection date from the corresponding sample. + +-- patch ENA dataset +update variant_observation_on set date=s.collection_date from sample_ena_on as s where s.run_accession = variant_observation_on.sample; +update subclonal_variant_observation_on set date=s.collection_date from sample_ena_on as s where s.run_accession = subclonal_variant_observation_on.sample; +update low_frequency_variant_observation_on set date=s.collection_date from sample_ena_on as s where s.run_accession = low_frequency_variant_observation_on.sample; +update lq_clonal_variant_observation_on set date=s.collection_date from sample_ena_on as s where s.run_accession = lq_clonal_variant_observation_on.sample; + +-- patch COVID19 Data Portal dataset +update variant_observation_covid19portal_on set date=s.collection_date from sample_ena_on as s where s.run_accession = variant_observation_covid19portal_on.sample; From 7faf9722b826942638f3d04d23ea7ac6f7307f4d Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Thu, 5 Jan 2023 10:44:06 +0100 Subject: [PATCH 3/4] makes sure that the collection date is stored in variant observations --- covigator/pipeline/vcf_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/covigator/pipeline/vcf_loader.py b/covigator/pipeline/vcf_loader.py index 89abea2f..be185ced 100644 --- a/covigator/pipeline/vcf_loader.py +++ b/covigator/pipeline/vcf_loader.py @@ -190,7 +190,7 @@ def _parse_variant_observation( gene_name=covigator_variant.gene_name, hgvs_p=covigator_variant.hgvs_p, hgvs_c=covigator_variant.hgvs_c, - date=sample.first_created, + date=sample.collection_date, variant_type=covigator_variant.variant_type, length=self._get_variant_length(variant), reference_amino_acid=covigator_variant.reference_amino_acid, From 3f301d7f86b09f2760163af4828bbdeb95ea5607 Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Thu, 5 Jan 2023 12:06:21 +0100 Subject: [PATCH 4/4] fix the patch on dates for COVID19 Data Portal dataset --- scripts/sql/20230105_path_collection_date_in_variants.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sql/20230105_path_collection_date_in_variants.sql b/scripts/sql/20230105_path_collection_date_in_variants.sql index c9e2713e..00e6c833 100644 --- a/scripts/sql/20230105_path_collection_date_in_variants.sql +++ b/scripts/sql/20230105_path_collection_date_in_variants.sql @@ -8,4 +8,4 @@ update low_frequency_variant_observation_on set date=s.collection_date from samp update lq_clonal_variant_observation_on set date=s.collection_date from sample_ena_on as s where s.run_accession = lq_clonal_variant_observation_on.sample; -- patch COVID19 Data Portal dataset -update variant_observation_covid19portal_on set date=s.collection_date from sample_ena_on as s where s.run_accession = variant_observation_covid19portal_on.sample; +update variant_observation_covid19portal_on set date=s.collection_date from sample_covid19_portal_on as s where s.run_accession = variant_observation_covid19portal_on.sample;