Skip to content

Commit f9ddddc

Browse files
Pydantic integration with the BaseESModel class
1 parent b9ada0f commit f9ddddc

24 files changed

+41529
-1
lines changed

elasticsearch/dsl/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
IndexTemplate,
9191
)
9292
from .mapping import AsyncMapping, Mapping
93+
from .pydantic import BaseESModel
9394
from .query import Q, Query
9495
from .response import AggResponse, Response, UpdateByQueryResponse
9596
from .search import (
@@ -120,6 +121,7 @@
120121
"AsyncUpdateByQuery",
121122
"AttrDict",
122123
"AttrList",
124+
"BaseESModel",
123125
"Binary",
124126
"Boolean",
125127
"Byte",

elasticsearch/dsl/pydantic.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Licensed to Elasticsearch B.V. under one or more contributor
2+
# license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright
4+
# ownership. Elasticsearch B.V. licenses this file to you under
5+
# the Apache License, Version 2.0 (the "License"); you may
6+
# not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from typing import Any, ClassVar, Dict, Self, Tuple, Type
19+
20+
from pydantic import BaseModel, Field, PrivateAttr
21+
from typing_extensions import Annotated, dataclass_transform
22+
23+
from elasticsearch import dsl
24+
25+
26+
class _BaseModel(BaseModel):
27+
meta: Annotated[Dict[str, Any], dsl.mapped_field(exclude=True)] = Field(default={})
28+
29+
30+
class BaseESModelMetaclass(type(BaseModel)): # type: ignore[misc]
31+
def __new__(cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]) -> Any:
32+
model = super().__new__(cls, name, bases, attrs)
33+
dsl_attrs = {
34+
attr: value
35+
for attr, value in dsl.AsyncDocument.__dict__.items()
36+
if not attr.startswith("__")
37+
}
38+
model._doc = type(dsl.AsyncDocument)( # type: ignore[misc]
39+
f"_ES{name}",
40+
dsl.AsyncDocument.__bases__,
41+
{**attrs, **dsl_attrs, "__qualname__": f"_ES{name}"},
42+
)
43+
return model
44+
45+
46+
@dataclass_transform(kw_only_default=True, field_specifiers=(Field, PrivateAttr))
47+
class BaseESModel(_BaseModel, metaclass=BaseESModelMetaclass):
48+
_doc: ClassVar[Type[dsl.AsyncDocument]]
49+
50+
def to_doc(self) -> dsl.AsyncDocument:
51+
data = self.model_dump()
52+
meta = {f"_{k}": v for k, v in data.pop("meta", {}).items()}
53+
return self._doc(**meta, **data)
54+
55+
@classmethod
56+
def from_doc(cls, dsl_obj: dsl.AsyncDocument) -> Self:
57+
return cls(meta=dsl_obj.meta.to_dict(), **dsl_obj.to_dict())

elasticsearch/dsl/response/aggs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def _wrap_bucket(self, data: Dict[str, Any]) -> Bucket[_R]:
6363
)
6464

6565
def __iter__(self) -> Iterator["Agg"]: # type: ignore[override]
66-
return iter(self.buckets)
66+
return iter(self.buckets) # type: ignore[arg-type]
6767

6868
def __len__(self) -> int:
6969
return len(self.buckets)

examples/quotes/.gitignore

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Logs
2+
logs
3+
*.log
4+
npm-debug.log*
5+
yarn-debug.log*
6+
yarn-error.log*
7+
pnpm-debug.log*
8+
lerna-debug.log*
9+
10+
node_modules
11+
dist
12+
dist-ssr
13+
*.local
14+
15+
# Editor directories and files
16+
.vscode/*
17+
!.vscode/extensions.json
18+
.idea
19+
.DS_Store
20+
*.suo
21+
*.ntvs*
22+
*.njsproj
23+
*.sln
24+
*.sw?

examples/quotes/README.md

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Quotes
2+
Quotes database example, which demonstrates the Elasticsearch integration with
3+
Pydantic models. This example features a React frontend and a FastAPI back end.
4+
5+
![Quotes app screenshot](screenshot.png)
6+
7+
## What is this?
8+
9+
This repository contains a small application that demonstrates how easy it is
10+
to set up a full-text and vector database using [Elasticsearch](https://www.elastic.co/elasticsearch),
11+
while defining the data model with [Pydantic](https://docs.pydantic.dev/latest/).
12+
13+
The application includes a FastAPI back end and a React front end. The example
14+
ingests a dataset of famous quotes into in an Elasticsearch index, and for each quote
15+
it generates an embedding using the
16+
[all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
17+
Sentence Transformers model.
18+
19+
The dataset used by this application has about 37,000 famous quotes, each with
20+
their author and tags. The data originates from a
21+
[Kaggle dataset](https://www.kaggle.com/datasets/akmittal/quotes-dataset) that
22+
appears to have been generated from quotes that were scraped from the Goodreads
23+
[popular quotes](https://www.goodreads.com/quotes) page.
24+
25+
## Requirements
26+
27+
Please make sure you have the following installed:
28+
29+
- Python 3.10 or newer
30+
- Node.js 18 or newer
31+
32+
## How To Run
33+
34+
Follow these steps to install this demo on your computer:
35+
36+
### Clone this repository
37+
38+
Run the following command to install a copy of this project on your computer:
39+
40+
```bash
41+
git clone https://github.com/elastic/elasticsearch-py
42+
cd examples/quotes
43+
```
44+
45+
### Install the Node and Python dependencies
46+
47+
Run the following command to set up the JavaScript and Python environment and
48+
install all the dependencies:
49+
50+
```bash
51+
npm install
52+
```
53+
54+
### Start a development Elasticsearch container
55+
56+
You can use [start-local](https://www.elastic.co/docs/deploy-manage/deploy/self-managed/local-development-installation-quickstart)
57+
to start a small Elasticsearch instance.
58+
59+
Use this command to launch the instance (Docker and Docker Compose are required):
60+
61+
```bash
62+
curl -fsSL https://elastic.co/start-local | sh
63+
```
64+
65+
### Create the quotes database
66+
67+
Run this command in your terminal:
68+
69+
```bash
70+
npm run ingest
71+
```
72+
73+
This task may take a few minutes. How long it takes depends on your computer
74+
speed and wether you have a GPU, which is used to generate the embeddings if
75+
available.
76+
77+
### Start the back end
78+
79+
Run this command in your terminal:
80+
81+
```bash
82+
npm run backend
83+
```
84+
85+
### Start the front end
86+
87+
Open a second terminal window and run this command:
88+
89+
```bash
90+
npm run dev
91+
```
92+
93+
You can now navigate to `http://localhost:5173` on your web browser to access
94+
the application.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[project]
2+
name = "quotes"
3+
version = "0.1"
4+
dependencies = [
5+
"elasticsearch[async]>=8,<9",
6+
"fastapi",
7+
"orjson",
8+
"sentence-transformers",
9+
"uvicorn",
10+
]
11+
12+
[project.optional-dependencies]
13+
dev = [
14+
]
15+
16+
[build-system]
17+
requires = ["hatchling"]
18+
build-backend = "hatchling.build"

0 commit comments

Comments
 (0)