From e11455b174522fe97c282ecac50b57aa99413826 Mon Sep 17 00:00:00 2001 From: Nobuaki Karasawa Date: Wed, 25 Sep 2024 09:04:09 +0900 Subject: [PATCH] fix!: fix protein ins unknown repeat notation --- src/clj_hgvs/mutation.cljc | 12 ++++++++---- test/clj_hgvs/mutation_test.cljc | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/clj_hgvs/mutation.cljc b/src/clj_hgvs/mutation.cljc index adf6222..7041543 100644 --- a/src/clj_hgvs/mutation.cljc +++ b/src/clj_hgvs/mutation.cljc @@ -1744,7 +1744,7 @@ ;;; Protein - insertion ;;; ;;; e.g. Lys23_Leu24insArgSerGln -;;; Arg78_Gly79ins23 +;;; Arg78_Gly79insX[23] (defrecord ProteinInsertion [ref-start coord-start ref-end coord-end alts] Mutation @@ -1759,7 +1759,7 @@ (coord/format coord-end) "ins" (if (every? #(= % "Xaa") alts) - (count alts) + (str "X[" (count alts) "]") (cond->> alts (= amino-acid-format :short) (map ->short-amino-acid)))]))) (plain [this] @@ -1790,10 +1790,14 @@ [s] (condp re-matches s #"([A-Z*]([a-z]{2})?)+" (mapv ->long-amino-acid (re-seq #"[A-Z*](?:[a-z]{2})?" s)) - #"\d+" (vec (repeat (intl/parse-long s) "Xaa")))) + #"X\[\d+\]" (-> (re-find #"X\[(\d+)\]" s) + second + intl/parse-long + (repeat "Xaa") + vec))) (def ^:private protein-insertion-re - #"([A-Z](?:[a-z]{2})?)(\d+)_([A-Z](?:[a-z]{2})?)(\d+)ins([\da-zA-Z*]+)") + #"([A-Z](?:[a-z]{2})?)(\d+)_([A-Z](?:[a-z]{2})?)(\d+)ins([\da-zA-Z*\[\]]+)") (defn parse-protein-insertion [s] diff --git a/test/clj_hgvs/mutation_test.cljc b/test/clj_hgvs/mutation_test.cljc index d83482f..aa1e52d 100644 --- a/test/clj_hgvs/mutation_test.cljc +++ b/test/clj_hgvs/mutation_test.cljc @@ -1327,7 +1327,7 @@ "Leu" (coord/protein-coordinate 24) ["Arg" "Ser" "Ter"])) -(def protein-insertion3s "Arg78_Gly79ins5") +(def protein-insertion3s "Arg78_Gly79insX[5]") (def protein-insertion3 (mut/protein-insertion "Arg" (coord/protein-coordinate 78) "Gly" (coord/protein-coordinate 79) ["Xaa" "Xaa" "Xaa" "Xaa" "Xaa"]))