Skip to content

Commit

Permalink
feat: better search
Browse files Browse the repository at this point in the history
Signed-off-by: Maud Royer <hello@maudroyer.fr>
  • Loading branch information
jillro committed Sep 6, 2024
1 parent 925984e commit 57a3856
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 73 deletions.
24 changes: 19 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,27 @@ de la base de données publique des médicaments.
Celles-ci sont transmises sous la forme d'un dump
`.sql` et d'un dossier contenant les images.

La base MySQL doit être restaurée depuis le dump.
Ces données sont stockées par MySQL, et doivent être restaurée
depuis le dump transmis par l'ANSM. La base de données MySQL
ne doit pas être modifiée, et doit rester un simple clone
de la base de données publique des médicaments.
### Données spécifiques à l'application

Les images sont stockées dans la base de données
PostgreSQL, de l'application et peuvent
être chargée avec et doivent être copiées
avec [le module `seed` de Kysely](https://sillon.incubateur.net/docs/database-for-everything/file-storage/).
Info Médicament utilise une base de données PostgreSQL
pour stocker les données spécifiques à l'application :
* les images des notices (pour éviter d'avoir à les stocker dans un système de fichiers)
* les index de recherche plein texte

Vous devez d'abord jouer les migrations pour créer les tables,
puis charger les données. La base MySQL doit être accessible préalablement.
```bash
# Créer les tables
kysele migrate:latest
# Charger les images et les index de recherche
# Le chemin vers le dossier contenant les images des notices doit être spécifié
# avec la variable d'environnement LEAFLET_IMAGES
LEAFLET_IMAGES=/path/to/folder kysely seed run
```
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"start": "next start",
"lint": "next lint",
"predev": "only-include-used-icons",
"prebuild": "only-include-used-icons"
"prebuild": "only-include-used-icons && kysely migrate:latest && kysely seed run --specific 1725439927263_searchIndex"
},
"dependencies": {
"@codegouvfr/react-dsfr": "^1.9.22",
Expand Down
232 changes: 170 additions & 62 deletions src/app/rechercher/page.tsx
Original file line number Diff line number Diff line change
@@ -1,41 +1,148 @@
import Link from "next/link";
import { sql } from "kysely";
import Button from "@codegouvfr/react-dsfr/Button";
import Input from "@codegouvfr/react-dsfr/Input";
import { pdbmMySQL, Specialite, SubstanceNom } from "@/db/pdbmMySQL";
import { fr } from "@codegouvfr/react-dsfr";
import Badge from "@codegouvfr/react-dsfr/Badge";
import db, { SearchResult } from "@/db";

import { formatSpecName, groupSpecialites } from "@/displayUtils";
import liste_CIS_MVP from "@/liste_CIS_MVP.json";

async function getResults(query: string) {
const specialites: Specialite[] = (
await pdbmMySQL
.selectFrom("Specialite")
.where("SpecDenom01", "like", `%${query}%`)
.selectAll()
.execute()
).filter((specialite) => liste_CIS_MVP.includes(specialite.SpecId));
type SearchResultItem =
| SubstanceNom
| { groupName: string; specialites: Specialite[] };

const substances: SubstanceNom[] = await pdbmMySQL
.selectFrom("Subs_Nom")
.where(({ eb, selectFrom }) =>
eb(
"NomId",
"in",
selectFrom("Composant")
.select("NomId")
.where("SpecId", "in", liste_CIS_MVP),
),
)
.where("NomLib", "like", `%${query}%`)
async function getSpecialites(specialitesId: string[], substancesId: string[]) {
return specialitesId.length
? await pdbmMySQL
.selectFrom("Specialite")
.leftJoin("Composant", "Specialite.SpecId", "Composant.SpecId")
.where(({ eb }) =>
eb.or([
eb("Specialite.SpecId", "in", specialitesId),
eb("Composant.NomId", "in", substancesId),
]),
)
.where("Specialite.SpecId", "in", liste_CIS_MVP)
.selectAll("Specialite")
.select("NomId")
.execute()
: [];
}

async function getSubstances(substancesId: string[]) {
const substances: SubstanceNom[] = substancesId.length
? await pdbmMySQL
.selectFrom("Subs_Nom")
.where("NomId", "in", substancesId)
.where(({ eb, selectFrom }) =>
eb(
"NomId",
"in",
selectFrom("Composant")
.select("NomId")
.where("SpecId", "in", liste_CIS_MVP),
),
)
.selectAll()
.execute()
: [];
return substances;
}

/**
* Get search results from the database
*
* The search results are generated and ordered by the following rules:
* 1. We get all substances and specialites matches from the search_index table
* 2. We retrieve all substances, all direct match for specialities,
* and all specialities that have a match with a substance
* 3. We group the specialities by their group name
* 4. The score of each result is the word similarity between the search query and the token,
* for specialities, we sum direct match score and substance match score
*/
async function getResults(query: string): Promise<SearchResultItem[]> {
const dbQuery = db
.selectFrom("search_index")
.selectAll()
.execute();
.select(({ fn, val }) => [
fn("word_similarity", [val(query), "token"]).as("sml"),
])
.where("token", sql`%>`, query)
.orderBy("sml", "desc")
.orderBy(({ fn }) => fn("length", ["token"]));

const matches = (await dbQuery.execute()) as (SearchResult & {
sml: number;
})[];

if (matches.length === 0) return [];

const specialitesId = matches
.filter((r) => r.table_name === "Specialite")
.map((r) => r.id);
const substancesId = matches
.filter((r) => r.table_name === "Subs_Nom")
.map((r) => r.id);

return {
specialites,
substances,
};
const specialites = await getSpecialites(specialitesId, substancesId);
const specialiteGroups = Array.from(groupSpecialites(specialites).entries());
const substances = await getSubstances(substancesId);

return matches
.reduce((acc: { score: number; item: SearchResultItem }[], match) => {
if (match.table_name === "Subs_Nom") {
const substance = substances.find(
(s) => s.NomId.trim() === match.id.trim(),
); // if undefined, the substance is not in one of the 500 CIS list
if (substance) {
acc.push({ score: match.sml, item: substance });

specialiteGroups
.filter(([, specialites]) =>
specialites.find(
(s) => s.NomId && s.NomId.trim() === substance.NomId.trim(),
),
)
.forEach(([groupName, specialites]) => {
if (
!acc.find((a) => "groupName" in a && a.groupName === groupName)
) {
let directMatch = matches.find(
(m) =>
m.table_name === "Specialite" &&
specialites.find((s) => s.SpecId.trim() === m.id.trim()),
);
acc.push({
score: directMatch ? directMatch.sml + match.sml : match.sml,
item: { groupName, specialites },
});
}
});
}
}

if (match.table_name === "Specialite") {
const specialiteGroup = specialiteGroups.find(([, specialites]) =>
specialites.find((s) => s.SpecId.trim() === match.id.trim()),
); // if undefined, the specialite is not in the 500 CIS list
if (
specialiteGroup &&
!acc.find(
(a) => "groupName" in a && a.groupName === specialiteGroup[0],
)
) {
const [groupName, specialites] = specialiteGroup;
acc.push({ score: match.sml, item: { groupName, specialites } });
}
}

return acc;
}, [])
.sort((a, b) => b.score - a.score)
.map(({ item }) => item);
}

export default async function Page({
Expand Down Expand Up @@ -70,46 +177,47 @@ export default async function Page({
</div>
{results && (
<>
<p>
{results.substances.length + results.specialites.length} RÉSULTATS
</p>
<p>{results.length} RÉSULTATS</p>
<ul>
{results.substances.map((substance: SubstanceNom) => (
<li key={substance.NomId} className={"fr-mb-2w"}>
<Link href={`/substance/${substance.NomId}`}>
<b>{formatSpecName(substance.NomLib)}</b>
</Link>
<Badge
className={fr.cx("fr-ml-2w", "fr-badge--purple-glycine")}
>
Substance
</Badge>
{results.map((result, index) => (
<li key={index} className={"fr-mb-2w"}>
{"NomLib" in result ? (
<>
<Link href={`/substance/${result.NomId}`}>
<b>{formatSpecName(result.NomLib)}</b>
</Link>
<Badge
className={fr.cx("fr-ml-2w", "fr-badge--purple-glycine")}
>
Substance
</Badge>
</>
) : (
<>
<b>{formatSpecName(result.groupName)}</b>
<Badge
className={fr.cx("fr-ml-2w", "fr-badge--green-emeraude")}
>
Médicament
</Badge>
<ul>
{result.specialites?.map((specialite) => (
<li key={specialite.SpecId}>
<Link href={`/medicament/${specialite.SpecId}`}>
{formatSpecName(specialite.SpecDenom01)
.replace(
`${formatSpecName(result.groupName)}, `,
"",
)
.replace(formatSpecName(result.groupName), "")}
</Link>
</li>
))}
</ul>
</>
)}
</li>
))}
{Array.from(groupSpecialites(results.specialites).entries()).map(
([groupName, specialites]: [string, Specialite[]]) => (
<li key={groupName} className={"fr-mb-2w"}>
<b>{formatSpecName(groupName)}</b>
<Badge
className={fr.cx("fr-ml-2w", "fr-badge--green-emeraude")}
>
Médicament
</Badge>
<ul>
{specialites?.map((specialite) => (
<li key={specialite.SpecId}>
<Link href={`/medicament/${specialite.SpecId}`}>
{formatSpecName(specialite.SpecDenom01).replace(
formatSpecName(groupName),
"",
)}
</Link>
</li>
))}
</ul>
</li>
),
)}
</ul>
</>
)}
Expand Down
8 changes: 8 additions & 0 deletions src/db/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@ import { Kysely, NoResultError, PostgresDialect, Selectable } from "kysely";
import { Pool } from "pg";

interface Database {
search_index: SearchIndexTable;
leaflet_images: LeafletImagesTable;
}

interface SearchIndexTable {
token: string;
table_name: "Specialite" | "Subs_Nom";
id: string;
}

interface LeafletImagesTable {
path: string;
image: Buffer;
Expand Down Expand Up @@ -34,6 +41,7 @@ export const getLeafletImage = async ({ src }: { src: string }) => {
};

export type LeafletImage = Selectable<LeafletImagesTable>;
export type SearchResult = Selectable<SearchIndexTable>;

const db = new Kysely<Database>({
dialect: new PostgresDialect({
Expand Down
26 changes: 26 additions & 0 deletions src/db/migrations/20240904085535_searchIndex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { Kysely, sql } from "kysely";

export async function up(db: Kysely<any>): Promise<void> {
await sql`CREATE EXTENSION IF NOT EXISTS pg_trgm`.execute(db);
await sql`CREATE EXTENSION IF NOT EXISTS unaccent`.execute(db);

await db.schema
.createTable("search_index")
.addColumn("token", "text")
.addColumn("table_name", "text")
.addColumn("id", "text")
.execute();

await db.schema
.createIndex("search_index_trgm")
.on("search_index")
.using("GIN (token gin_trgm_ops)")
.execute();
}

export async function down(db: Kysely<any>): Promise<void> {
await db.schema.dropTable("search_index").execute();

await sql`DROP EXTENSION IF EXISTS unaccent`.execute(db);
await sql`DROP EXTENSION IF EXISTS pg_trgm`.execute(db);
}
Loading

0 comments on commit 57a3856

Please sign in to comment.