feat: update requirements, stop installing dependencies twice and add…

…ed decimer segmentation
Steinbeck-Lab · Mar 13, 2023 · 096dc67 · 096dc67
1 parent d484b9d
commit 096dc67
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 6 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -33,8 +33,11 @@ WORKDIR /code
 RUN python3 -m pip install -U pip 
 
 COPY ./requirements.txt /code/requirements.txt
-
+RUN pip3 install --upgrade setuptools pip
 RUN pip3 install --no-cache-dir --upgrade -r /code/requirements.txt
+RUN pip3 install --no-deps decimer-segmentation
+RUN pip3 install --no-deps decimer>=2.2.0
+RUN pip3 install --no-deps STOUT-pypi>=2.0.5
 
 RUN python3 -m pip uninstall -y uvicorn
 

diff --git a/app/modules/decimermodules.py b/app/modules/decimermodules.py
@@ -0,0 +1,28 @@
+import os
+import cv2
+from decimer_segmentation import segment_chemical_structures_from_file
+from DECIMER import predict_SMILES
+
+def getPredictedSegments(path:str):
+    """Takes an image filepath and returns a set of paths of segmented images
+    Args:
+        input_path (str): path of an image
+    
+    Returns:
+        segment_paths (list): a list of paths of segmented images. 
+    """
+    smiles_predicted = []
+    image_name = os.path.split(path)[1]
+    segments = segment_chemical_structures_from_file(path)
+    if len(segments) == 0:
+        smiles = predict_SMILES(path)
+        return smiles
+    else:
+        for segment_index in range(len(segments)):
+            segmentname = f"{image_name[:-5]}_{segment_index}.png"
+            segment_path = os.path.join(segmentname)
+            cv2.imwrite(segment_path, segments[segment_index])
+            smiles = predict_SMILES(segment_path)
+            smiles_predicted.append(smiles)
+            os.remove(segment_path)
+        return '.'.join(smiles_predicted)
diff --git a/app/routers/chem.py b/app/routers/chem.py
@@ -20,7 +20,7 @@
 from app.modules.classyfire import classify, result
 from app.modules.cdkmodules import getCDKSDGMol
 from app.modules.depict import getRDKitDepiction, getCDKDepiction
-from DECIMER import predict_SMILES
+from app.modules.decimermodules import getPredictedSegments
 
 router = APIRouter(
     prefix="/chem",
@@ -183,7 +183,7 @@ async def extract_chemicalinfo(request: Request):
             response = urlopen(imgDataURI)
             with open(filename, "wb") as f:
                 f.write(response.file.read())
-                smiles = predict_SMILES(filename)
+                smiles = getPredictedSegments(filename)
                 os.remove(filename)
                 return JSONResponse(
                     content={"reference": reference, "smiles": smiles.split(".")}
@@ -193,7 +193,7 @@ async def extract_chemicalinfo(request: Request):
         if response.status_code == 200:
             with open(filename, "wb") as f:
                 f.write(response.content)
-                smiles = predict_SMILES(filename)
+                smiles = getPredictedSegments(filename)
                 os.remove(filename)
                 return JSONResponse(
                     content={"reference": reference, "smiles": smiles.split(".")}

diff --git a/requirements.txt b/requirements.txt
@@ -4,6 +4,15 @@ uvicorn>=0.15.0,<0.16.0
 psycopg2>=2.7,<2.8
 fastapi-pagination==0.10.0
 rdkit-pypi>=2022.09.4
-STOUT-pypi>=2.0.5
 websockets==10.4
-decimer>=2.2.0
+pillow
+opencv-python
+matplotlib
+scikit-image
+imantics
+pdf2image
+IPython
+pystow
+unicodedata2
+efficientnet
+tensorflow==2.10.0