From 096dc671f7326a4be85d541970ce75175d92fc2c Mon Sep 17 00:00:00 2001 From: Kohulan Date: Mon, 13 Mar 2023 17:17:11 +0100 Subject: [PATCH] feat: update requirements, stop installing dependencies twice and added decimer segmentation --- Dockerfile | 5 ++++- app/modules/decimermodules.py | 28 ++++++++++++++++++++++++++++ app/routers/chem.py | 6 +++--- requirements.txt | 13 +++++++++++-- 4 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 app/modules/decimermodules.py diff --git a/Dockerfile b/Dockerfile index 40f5ffd..6fd80d9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,8 +33,11 @@ WORKDIR /code RUN python3 -m pip install -U pip COPY ./requirements.txt /code/requirements.txt - +RUN pip3 install --upgrade setuptools pip RUN pip3 install --no-cache-dir --upgrade -r /code/requirements.txt +RUN pip3 install --no-deps decimer-segmentation +RUN pip3 install --no-deps decimer>=2.2.0 +RUN pip3 install --no-deps STOUT-pypi>=2.0.5 RUN python3 -m pip uninstall -y uvicorn diff --git a/app/modules/decimermodules.py b/app/modules/decimermodules.py new file mode 100644 index 0000000..ab152be --- /dev/null +++ b/app/modules/decimermodules.py @@ -0,0 +1,28 @@ +import os +import cv2 +from decimer_segmentation import segment_chemical_structures_from_file +from DECIMER import predict_SMILES + +def getPredictedSegments(path:str): + """Takes an image filepath and returns a set of paths of segmented images + Args: + input_path (str): path of an image + + Returns: + segment_paths (list): a list of paths of segmented images. + """ + smiles_predicted = [] + image_name = os.path.split(path)[1] + segments = segment_chemical_structures_from_file(path) + if len(segments) == 0: + smiles = predict_SMILES(path) + return smiles + else: + for segment_index in range(len(segments)): + segmentname = f"{image_name[:-5]}_{segment_index}.png" + segment_path = os.path.join(segmentname) + cv2.imwrite(segment_path, segments[segment_index]) + smiles = predict_SMILES(segment_path) + smiles_predicted.append(smiles) + os.remove(segment_path) + return '.'.join(smiles_predicted) diff --git a/app/routers/chem.py b/app/routers/chem.py index a496aeb..c223fd0 100644 --- a/app/routers/chem.py +++ b/app/routers/chem.py @@ -20,7 +20,7 @@ from app.modules.classyfire import classify, result from app.modules.cdkmodules import getCDKSDGMol from app.modules.depict import getRDKitDepiction, getCDKDepiction -from DECIMER import predict_SMILES +from app.modules.decimermodules import getPredictedSegments router = APIRouter( prefix="/chem", @@ -183,7 +183,7 @@ async def extract_chemicalinfo(request: Request): response = urlopen(imgDataURI) with open(filename, "wb") as f: f.write(response.file.read()) - smiles = predict_SMILES(filename) + smiles = getPredictedSegments(filename) os.remove(filename) return JSONResponse( content={"reference": reference, "smiles": smiles.split(".")} @@ -193,7 +193,7 @@ async def extract_chemicalinfo(request: Request): if response.status_code == 200: with open(filename, "wb") as f: f.write(response.content) - smiles = predict_SMILES(filename) + smiles = getPredictedSegments(filename) os.remove(filename) return JSONResponse( content={"reference": reference, "smiles": smiles.split(".")} diff --git a/requirements.txt b/requirements.txt index 0712367..df11670 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,15 @@ uvicorn>=0.15.0,<0.16.0 psycopg2>=2.7,<2.8 fastapi-pagination==0.10.0 rdkit-pypi>=2022.09.4 -STOUT-pypi>=2.0.5 websockets==10.4 -decimer>=2.2.0 \ No newline at end of file +pillow +opencv-python +matplotlib +scikit-image +imantics +pdf2image +IPython +pystow +unicodedata2 +efficientnet +tensorflow==2.10.0 \ No newline at end of file