From 096dc671f7326a4be85d541970ce75175d92fc2c Mon Sep 17 00:00:00 2001
From: Kohulan <kohulan.rajan@uni-jena.de>
Date: Mon, 13 Mar 2023 17:17:11 +0100
Subject: [PATCH] feat: update requirements, stop installing dependencies twice
 and added decimer segmentation

---
 Dockerfile                    |  5 ++++-
 app/modules/decimermodules.py | 28 ++++++++++++++++++++++++++++
 app/routers/chem.py           |  6 +++---
 requirements.txt              | 13 +++++++++++--
 4 files changed, 46 insertions(+), 6 deletions(-)
 create mode 100644 app/modules/decimermodules.py

diff --git a/Dockerfile b/Dockerfile
index 40f5ffd..6fd80d9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -33,8 +33,11 @@ WORKDIR /code
 RUN python3 -m pip install -U pip 
 
 COPY ./requirements.txt /code/requirements.txt
-
+RUN pip3 install --upgrade setuptools pip
 RUN pip3 install --no-cache-dir --upgrade -r /code/requirements.txt
+RUN pip3 install --no-deps decimer-segmentation
+RUN pip3 install --no-deps decimer>=2.2.0
+RUN pip3 install --no-deps STOUT-pypi>=2.0.5
 
 RUN python3 -m pip uninstall -y uvicorn
 
diff --git a/app/modules/decimermodules.py b/app/modules/decimermodules.py
new file mode 100644
index 0000000..ab152be
--- /dev/null
+++ b/app/modules/decimermodules.py
@@ -0,0 +1,28 @@
+import os
+import cv2
+from decimer_segmentation import segment_chemical_structures_from_file
+from DECIMER import predict_SMILES
+
+def getPredictedSegments(path:str):
+    """Takes an image filepath and returns a set of paths of segmented images
+    Args:
+        input_path (str): path of an image
+    
+    Returns:
+        segment_paths (list): a list of paths of segmented images. 
+    """
+    smiles_predicted = []
+    image_name = os.path.split(path)[1]
+    segments = segment_chemical_structures_from_file(path)
+    if len(segments) == 0:
+        smiles = predict_SMILES(path)
+        return smiles
+    else:
+        for segment_index in range(len(segments)):
+            segmentname = f"{image_name[:-5]}_{segment_index}.png"
+            segment_path = os.path.join(segmentname)
+            cv2.imwrite(segment_path, segments[segment_index])
+            smiles = predict_SMILES(segment_path)
+            smiles_predicted.append(smiles)
+            os.remove(segment_path)
+        return '.'.join(smiles_predicted)
diff --git a/app/routers/chem.py b/app/routers/chem.py
index a496aeb..c223fd0 100644
--- a/app/routers/chem.py
+++ b/app/routers/chem.py
@@ -20,7 +20,7 @@
 from app.modules.classyfire import classify, result
 from app.modules.cdkmodules import getCDKSDGMol
 from app.modules.depict import getRDKitDepiction, getCDKDepiction
-from DECIMER import predict_SMILES
+from app.modules.decimermodules import getPredictedSegments
 
 router = APIRouter(
     prefix="/chem",
@@ -183,7 +183,7 @@ async def extract_chemicalinfo(request: Request):
             response = urlopen(imgDataURI)
             with open(filename, "wb") as f:
                 f.write(response.file.read())
-                smiles = predict_SMILES(filename)
+                smiles = getPredictedSegments(filename)
                 os.remove(filename)
                 return JSONResponse(
                     content={"reference": reference, "smiles": smiles.split(".")}
@@ -193,7 +193,7 @@ async def extract_chemicalinfo(request: Request):
         if response.status_code == 200:
             with open(filename, "wb") as f:
                 f.write(response.content)
-                smiles = predict_SMILES(filename)
+                smiles = getPredictedSegments(filename)
                 os.remove(filename)
                 return JSONResponse(
                     content={"reference": reference, "smiles": smiles.split(".")}
diff --git a/requirements.txt b/requirements.txt
index 0712367..df11670 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,15 @@ uvicorn>=0.15.0,<0.16.0
 psycopg2>=2.7,<2.8
 fastapi-pagination==0.10.0
 rdkit-pypi>=2022.09.4
-STOUT-pypi>=2.0.5
 websockets==10.4
-decimer>=2.2.0
\ No newline at end of file
+pillow
+opencv-python
+matplotlib
+scikit-image
+imantics
+pdf2image
+IPython
+pystow
+unicodedata2
+efficientnet
+tensorflow==2.10.0
\ No newline at end of file