From 286fa35263c54fa362211609fa428a8f2f0b08b4 Mon Sep 17 00:00:00 2001 From: Jean-Pascal MILCENT Date: Thu, 2 May 2024 12:22:56 +0200 Subject: [PATCH] feat: improve taxons search (unaccent, better display) Resolve #532. --- atlas/atlasAPI.py | 2 +- atlas/modeles/entities/vmSearchTaxon.py | 1 + .../repositories/vmSearchTaxonRepository.py | 26 +++--- data/atlas/5.atlas.vm_search_taxon.sql | 84 ++++++++++++------- data/update/update_1.6.1to1.6.2.sql | 69 +++++++++++++++ docs/changelog.rst | 18 ++++ 6 files changed, 158 insertions(+), 42 deletions(-) diff --git a/atlas/atlasAPI.py b/atlas/atlasAPI.py index de661edce..ad4952423 100644 --- a/atlas/atlasAPI.py +++ b/atlas/atlasAPI.py @@ -20,7 +20,7 @@ def searchTaxonAPI(): session = db.session search = request.args.get("search", "") limit = request.args.get("limit", 50) - results = vmSearchTaxonRepository.listeTaxonsSearch(session, search, limit) + results = vmSearchTaxonRepository.searchTaxons(session, search, limit) session.close() return jsonify(results) diff --git a/atlas/modeles/entities/vmSearchTaxon.py b/atlas/modeles/entities/vmSearchTaxon.py index a6e4c3813..3fb9b2b9e 100644 --- a/atlas/modeles/entities/vmSearchTaxon.py +++ b/atlas/modeles/entities/vmSearchTaxon.py @@ -14,6 +14,7 @@ class VmSearchTaxon(Base): metadata, Column("cd_ref", Integer, primary_key=True, unique=True), Column("cd_nom", Integer), + Column("display_name", String), Column("search_name", String), schema="atlas", autoload=True, diff --git a/atlas/modeles/repositories/vmSearchTaxonRepository.py b/atlas/modeles/repositories/vmSearchTaxonRepository.py index 95c138153..c9798caac 100644 --- a/atlas/modeles/repositories/vmSearchTaxonRepository.py +++ b/atlas/modeles/repositories/vmSearchTaxonRepository.py @@ -19,7 +19,7 @@ def listeTaxons(session): return taxonList -def listeTaxonsSearch(session, search, limit=50): +def searchTaxons(session, search, limit=50): """ Recherche dans la VmSearchTaxon en ilike Utilisé pour l'autocomplétion de la recherche de taxon @@ -34,20 +34,20 @@ def listeTaxonsSearch(session, search, limit=50): label = search_name value = cd_ref """ - - req = session.query( - VmSearchTaxon.search_name, - VmSearchTaxon.cd_ref, - func.similarity(VmSearchTaxon.search_name, search).label("idx_trgm"), - ).distinct() - - search = search.replace(" ", "%") - req = ( - req.filter(VmSearchTaxon.search_name.ilike("%" + search + "%")) + like_search = "%" + search.replace(" ", "%") + "%" + + query = ( + session.query( + VmSearchTaxon.display_name, + VmSearchTaxon.cd_ref, + func.similarity(VmSearchTaxon.search_name, search).label("idx_trgm"), + ) + .distinct() + .filter(func.unaccent(VmSearchTaxon.search_name).ilike(func.unaccent(like_search))) .order_by(desc("idx_trgm")) .order_by(VmSearchTaxon.cd_ref == VmSearchTaxon.cd_nom) .limit(limit) ) - data = req.all() + results = query.all() - return [{"label": d[0], "value": d[1]} for d in data] + return [{"label": r[0], "value": r[1]} for r in results] diff --git a/data/atlas/5.atlas.vm_search_taxon.sql b/data/atlas/5.atlas.vm_search_taxon.sql index 083783573..68bbfa7b4 100644 --- a/data/atlas/5.atlas.vm_search_taxon.sql +++ b/data/atlas/5.atlas.vm_search_taxon.sql @@ -1,34 +1,62 @@ --- Taxons observés et de tous leurs synonymes (utilisés pour la recherche d'une espèce) +CREATE MATERIALIZED VIEW atlas.vm_search_taxon AS + WITH verna_names AS ( + SELECT DISTINCT + cd_nom, + lb_nom, + cd_ref, + STRING_TO_TABLE(nom_vern, ', ') AS nom_vern + FROM atlas.vm_taxref + WHERE nom_vern IS NOT NULL + AND cd_nom = cd_ref + AND nom_vern <> lb_nom + ), + names AS ( + -- Chosen scinames + SELECT + cd_nom, + cd_ref, + lb_nom AS search_name, + CONCAT('', REPLACE(nom_complet_html, lb_auteur, ''), ' ', lb_auteur) AS display_name + FROM atlas.vm_taxref + WHERE cd_nom = cd_ref -CREATE MATERIALIZED VIEW atlas.vm_search_taxon AS -SELECT row_number() OVER (ORDER BY t.cd_nom,t.cd_ref,t.search_name)::integer AS fid, - t.cd_nom, - t.cd_ref, - t.search_name, - t.nom_valide, - t.lb_nom -FROM ( - SELECT t_1.cd_nom, - t_1.cd_ref, - concat(t_1.lb_nom, ' = ', t_1.nom_valide, '') AS search_name, - t_1.nom_valide, - t_1.lb_nom - FROM atlas.vm_taxref t_1 + UNION - UNION - SELECT t_1.cd_nom, - t_1.cd_ref, - concat(t_1.nom_vern, ' = ', t_1.nom_valide, '' ) AS search_name, - t_1.nom_valide, - t_1.lb_nom - FROM atlas.vm_taxref t_1 - WHERE t_1.nom_vern IS NOT NULL AND t_1.cd_nom = t_1.cd_ref -) t -JOIN atlas.vm_taxons taxons ON taxons.cd_ref = t.cd_ref; + -- Synonym scinames + SELECT + t1.cd_nom, + t1.cd_ref, + t1.lb_nom AS search_name, + CONCAT(REPLACE(t1.nom_complet_html, t1.lb_auteur, ''), ' = ', REPLACE(t2.nom_complet_html, t2.lb_auteur, ''), ' ', t2.lb_auteur) AS display_name + FROM atlas.vm_taxref AS t1 + JOIN atlas.vm_taxref AS t2 + ON t1.cd_ref = t2.cd_nom + WHERE t1.cd_nom <> t1.cd_ref + + UNION + + -- Vernacular names + SELECT + v.cd_nom, + v.cd_ref, + v.nom_vern AS search_name, + CONCAT(v.nom_vern, ' = ', REPLACE(t.nom_complet_html, t.lb_auteur, ''), ' ', t.lb_auteur) AS display_name + FROM verna_names AS v + JOIN atlas.vm_taxref AS t + ON t.cd_nom = v.cd_ref + WHERE v.nom_vern <> v.lb_nom + ) + SELECT ROW_NUMBER() OVER (ORDER BY n.cd_nom, n.cd_ref, n.search_name)::integer AS fid, + n.cd_nom, + n.cd_ref, + n.search_name, + n.display_name + FROM atlas.vm_taxons AS t + JOIN names AS n + ON t.cd_ref = n.cd_ref ; CREATE UNIQUE INDEX ON atlas.vm_search_taxon(fid); CREATE INDEX ON atlas.vm_search_taxon(cd_nom); -create INDEX ON atlas.vm_search_taxon(cd_ref); - +CREATE INDEX ON atlas.vm_search_taxon(cd_ref); CREATE INDEX trgm_idx ON atlas.vm_search_taxon USING GIST (search_name gist_trgm_ops); -CREATE UNIQUE INDEX ON atlas.vm_search_taxon (cd_nom, search_name); \ No newline at end of file +CREATE UNIQUE INDEX ON atlas.vm_search_taxon (cd_nom, search_name); diff --git a/data/update/update_1.6.1to1.6.2.sql b/data/update/update_1.6.1to1.6.2.sql index 8841c9526..03c3eba9a 100644 --- a/data/update/update_1.6.1to1.6.2.sql +++ b/data/update/update_1.6.1to1.6.2.sql @@ -43,3 +43,72 @@ CREATE INDEX ON atlas.vm_observations_mailles CREATE INDEX ON atlas.vm_observations_mailles USING btree (id_maille, cd_ref); + + +-- ISSUE #531 & #532 +CREATE EXTENSION IF NOT EXISTS unaccent SCHEMA "public"; + +-- ISSUE #532 +DROP MATERIALIZED VIEW IF EXISTS atlas.vm_search_taxon ; +CREATE MATERIALIZED VIEW atlas.vm_search_taxon AS + WITH verna_names AS ( + SELECT DISTINCT + cd_nom, + lb_nom, + cd_ref, + STRING_TO_TABLE(nom_vern, ', ') AS nom_vern + FROM atlas.vm_taxref + WHERE nom_vern IS NOT NULL + AND cd_nom = cd_ref + AND nom_vern <> lb_nom + ), + names AS ( + -- Chosen scinames + SELECT + cd_nom, + cd_ref, + lb_nom AS search_name, + CONCAT('', REPLACE(nom_complet_html, lb_auteur, ''), ' ', lb_auteur) AS display_name + FROM atlas.vm_taxref + WHERE cd_nom = cd_ref + + UNION + + -- Synonym scinames + SELECT + t1.cd_nom, + t1.cd_ref, + t1.lb_nom AS search_name, + CONCAT(REPLACE(t1.nom_complet_html, t1.lb_auteur, ''), ' = ', REPLACE(t2.nom_complet_html, t2.lb_auteur, ''), ' ', t2.lb_auteur) AS display_name + FROM atlas.vm_taxref AS t1 + JOIN atlas.vm_taxref AS t2 + ON t1.cd_ref = t2.cd_nom + WHERE t1.cd_nom <> t1.cd_ref + + UNION + + -- Vernacular names + SELECT + v.cd_nom, + v.cd_ref, + v.nom_vern AS search_name, + CONCAT(v.nom_vern, ' = ', REPLACE(t.nom_complet_html, t.lb_auteur, ''), ' ', t.lb_auteur) AS display_name + FROM verna_names AS v + JOIN atlas.vm_taxref AS t + ON t.cd_nom = v.cd_ref + WHERE v.nom_vern <> v.lb_nom + ) + SELECT ROW_NUMBER() OVER (ORDER BY n.cd_nom, n.cd_ref, n.search_name)::integer AS fid, + n.cd_nom, + n.cd_ref, + n.search_name, + n.display_name + FROM atlas.vm_taxons AS t + JOIN names AS n + ON t.cd_ref = n.cd_ref ; + +CREATE UNIQUE INDEX ON atlas.vm_search_taxon(fid); +CREATE INDEX ON atlas.vm_search_taxon(cd_nom); +CREATE INDEX ON atlas.vm_search_taxon(cd_ref); +CREATE INDEX trgm_idx ON atlas.vm_search_taxon USING GIST (search_name gist_trgm_ops); +CREATE UNIQUE INDEX ON atlas.vm_search_taxon (cd_nom, search_name); diff --git a/docs/changelog.rst b/docs/changelog.rst index 2709dccf9..ac61947d7 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -2,6 +2,24 @@ CHANGELOG ========= +[Unreleased] +------------------ + +🐛 **Optimisations** + +- Amélioration de la "recherche par espèces" (#532 par @jpm-cbna) + + +⚠️ **Notes de version** + +Si vous mettez à jour GeoNature-atlas : + +- Exécutez le script SQL de mise à jour de la BDD : https://github.com/PnX-SI/GeoNature-atlas/blob/master/data/update/update_1.6.1to1.6.2.sql +- Donner les droits à l'utilisateur en lecture seule de l'Atlas (habituellement geonatatlas) : + :: + GRANT SELECT ON TABLE atlas.vm_search_taxon TO geonatatlas; + + 1.6.1 (2023-10-16) ------------------