From 2232da2d8ab74b52dcd429ecc6127bbfa21625e1 Mon Sep 17 00:00:00 2001
From: Jazz Mack Smith <j.mack-smith@imperial.ac.uk>
Date: Fri, 24 Nov 2023 16:42:45 +0000
Subject: [PATCH] Hopeful fix for missing PCA plots

---
 nPYc/plotting/_multivariatePlotting.py | 106 ++++++++++---------------
 nPYc/plotting/_plotting.py             |   5 +-
 nPYc/reports/multivariateReport.py     |  24 +++++-
 3 files changed, 65 insertions(+), 70 deletions(-)

diff --git a/nPYc/plotting/_multivariatePlotting.py b/nPYc/plotting/_multivariatePlotting.py
index 519fa5e3..be5b01d3 100644
--- a/nPYc/plotting/_multivariatePlotting.py
+++ b/nPYc/plotting/_multivariatePlotting.py
@@ -87,39 +87,34 @@ def plotScores(pcaModel, classes=None, colourType=None,
 	:param dict figures: If not ``None``, saves location of each figure for output in html report (see multivariateReport.py)
 	"""
 
-    print("Plotting scores %s" % colourType)
+    #print("----->> Plotting scores %s" % colourType)
     # Check inputs
+
     if not isinstance(pcaModel, ChemometricsPCA):
         raise TypeError('PCAmodel must be an instance of ChemometricsPCA')
 
-    # Preparation
-    values = pcaModel.scores
-    ns, nc = values.shape
-
-    if colourType is not None and colourType not in {'categorical', 'continuous', 'continuousCentered'}:
-        raise ValueError('colourType must be == ' + str({'categorical', 'continuous', 'continuousCentered'}))
-
     if classes is not None and colourType is None:
         raise ValueError('If classes is specified, colourType must be')
 
-    if classes is None:
-        classes = pandas.Series('Study Sample' for i in range(ns))
-        colourType = 'categorical'
+    if colourType:
+        if colourType not in {'categorical', 'continuous', 'continuousCentered'}:
+            raise ValueError('colourType must be == ' + str({'categorical', 'continuous','continuousCentered'}))
 
-    uniq = classes.unique()
-    try:
-        uniq.sort()
-    except:
-        pass
-    if colourType == 'categorical':
-        classes = classes.astype(str)
-
-    # If colourDict check colour defined for every unique entry in class
-    colourDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=colourDict, dictName="colourDict")
-    markerDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=markerDict, dictName="markerDict", defaultVal="o")
+        # If colourDict check colour defined for every unique entry in class
+    if classes is not None and colourDict is not None:
+        uniq = classes.unique()
+        if not all(k in colourDict.keys() for k in uniq):
+            print("dict keys are %s" % colourDict.keys())
+            print("Category keys are %s" % uniq)
+            raise ValueError(
+                'If classes and colourDict are specified every unique entry in class must be a key in colourDict')
 
     from matplotlib.patches import Ellipse
 
+    # Preparation
+    values = pcaModel.scores
+    ns, nc = values.shape
+
     if components is None:
         components = numpy.ones([nc]).astype(bool)
     components = numpy.where(components == True)
@@ -137,6 +132,19 @@ def plotScores(pcaModel, classes=None, colourType=None,
     else:
         plotTitle = ''
 
+    if classes is None:
+        classes = pandas.Series('Sample' for i in range(ns))
+        colourType = 'categorical'
+
+    if colourType == 'categorical':
+        classes = classes.astype(str)
+
+    uniq = classes.unique()
+    try:
+        uniq.sort()
+    except:
+        pass
+
     # Calculate critical value for Hotelling's T2
     # Fval = f.ppf(0.95, 2, ns-2)
     # Plot scores for each pair of components
@@ -175,15 +183,15 @@ def plotScores(pcaModel, classes=None, colourType=None,
         ax.set_ylim([(ymin + (0.2 * ymin)), ymax + (0.2 * ymax)])
 
         if colourType == 'categorical':
-
             # Plot according to user defined colours if available
+
             if colourDict is not None:
+
                 for u in uniq:
                     ax.scatter(values[classes.values == u, components[i]],
                                values[classes.values == u, components[j]],
                                c=colourDict[u], marker=markerDict[u],
                                label=u, alpha=opacity)
-
             else:
                 colors_sns = {}
 
@@ -329,6 +337,7 @@ def plotOutliers(values, runOrder, addViolin=False, sampleType=None,
 	:param str xlabel: Label for the x-axis
 	"""
 
+    print("Plotting outliers")
     # Preparation
     if isinstance(sampleType, (str, type(None))):
         sampleType = pandas.Series(['Sample' for i in range(0, len(values))], name='sampleType')
@@ -369,47 +378,16 @@ def plotOutliers(values, runOrder, addViolin=False, sampleType=None,
     sampleMasks = []
     palette = {}
 
-    print("colourDict %s" % colourDict)
-    print("markerDict %s" % markerDict)
     # Plot data coloured by sample type
-    # TODO: refactor this
-    if any(sampleType == 'Study Sample'):
-        x = 'Study Sample'
-        ax.scatter(runOrder[sampleType.values == x],
-                   values[sampleType.values == x],
-                   c=colourDict[x],
-                   marker=markerDict[x],
-                   label=x, alpha=opacity)
-        sampleMasks.append((abbrDict[x], sampleType.values == x))
-        palette[abbrDict[x]] = colourDict[x]
-
-    if any(sampleType == 'Study Reference'):
-        x = 'Study Reference'
-        ax.scatter(runOrder[sampleType.values == x],
-                   values[sampleType.values == x],
-                   c=colourDict[x],
-                   marker=markerDict[x],
-                   label=x, alpha=opacity)
-        sampleMasks.append((abbrDict[x], sampleType.values == x))
-        palette[abbrDict[x]] = colourDict[x]
-
-    if any(sampleType == 'Long-Term Reference'):
-        x = 'Long-Term Reference'
-        ax.scatter(runOrder[sampleType.values == x],
-                   values[sampleType.values == x],
-                   c=colourDict[x],
-                   marker=markerDict[x],
-                   label=x, alpha=opacity)
-        sampleMasks.append((abbrDict[x], sampleType.values == x))
-        palette[abbrDict[x]] = colourDict[x]
-
-    if any(sampleType == 'Sample'):
-        x = 'Sample'
-        ax.scatter(runOrder[sampleType.values == x],
-                   values[sampleType.values == x],
-                   label=x, alpha=opacity)
-        sampleMasks.append((abbrDict[x], sampleType.values == x))
-        palette[abbrDict[x]] = colourDict[x]
+    for u in uniq:
+        sc = ax.scatter(runOrder[sampleType.values == u],
+                        values[sampleType.values == u],
+                        marker=markerDict[u],
+                        c=colourDict[u],
+                        alpha=opacity,
+                        label=u)
+        sampleMasks.append((abbrDict[u], sampleType.values == u))
+        palette[abbrDict[u]] = colourDict[u]
 
     xmin, xmax = ax.get_xlim()
 
diff --git a/nPYc/plotting/_plotting.py b/nPYc/plotting/_plotting.py
index ab1087fe..0afa7823 100644
--- a/nPYc/plotting/_plotting.py
+++ b/nPYc/plotting/_plotting.py
@@ -389,9 +389,10 @@ def checkAndSetPlotAttributes(uniqKeys, attribDict, dictName, defaultVal=None):
     # check all the keys of attribDict are in uniqKeys
     # putting this here to see if it's a useful refactor. It may not be.
     if attribDict is not None:
+        #print("dict keys are %s" % attribDict.keys())
+        #print("Category keys are %s" % uniqKeys)
         if not all(k in attribDict.keys() for k in uniqKeys):
-            print(dictName + " keys are " + attribDict.keys())
-            print("Category keys are " + uniqKeys)
+
             raise ValueError(
                 'Check keys in ' + dictName + "; some aren't present in the categories list.")
     else:
diff --git a/nPYc/reports/multivariateReport.py b/nPYc/reports/multivariateReport.py
index b9348f0e..28aed594 100644
--- a/nPYc/reports/multivariateReport.py
+++ b/nPYc/reports/multivariateReport.py
@@ -705,8 +705,7 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio
             figuresKWscores = _plotScoresLocal(dataForPlotting,
                                                fields,
                                                pcaModel,
-                                               'categorical',
-                                               data.name,
+                                               'categorical', data.name,
                                                alpha=hotellings_alpha,
                                                plotAssociation=sigKru,
                                                kw_threshold=kw_threshold,
@@ -716,11 +715,25 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio
                                                dpi=data.Attributes['dpi'],
                                                figureSize=data.Attributes['figureSize'])
 
+            figuresQCscores = plotScores(pcaModel,
+                                         classes=data.sampleMetadata['SampleClass'],
+                                         colourType='categorical',
+                                         colourDict=data.Attributes['sampleTypeColours'],
+                                         markerDict=data.Attributes['sampleTypeMarkers'],
+                                         title='SampleClass',
+                                         figures=figuresQCscores,
+                                         hotelling_alpha=hotellings_alpha,
+                                         savePath=saveAs,
+                                         figureFormat=data.Attributes['figureFormat'],
+                                         dpi=data.Attributes['dpi'],
+                                         figureSize=data.Attributes['figureSize'])
+
             if destinationPath is not None:
                 for key in figuresKWscores:
                     if os.path.join(destinationPath, 'graphics') in str(figuresKWscores[key]):
                         figuresKWscores[key] = re.sub('.*graphics', 'graphics', figuresKWscores[key])
             item['KWscores'] = figuresKWscores
+
     else:
         if destinationPath is None:
             print('\n' + item[
@@ -752,6 +765,7 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio
                                               pcaModel,
                                               'categorical',
                                               data.name,
+
                                               alpha=hotellings_alpha,
                                               plotAssociation=sigNone,
                                               saveDir=saveAs,
@@ -791,8 +805,10 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio
     return None
 
 
-def _plotScoresLocal(data, metadata, pcaModel, classType, name, alpha=0.05, plotAssociation=None, r_threshold=None,
-                     kw_threshold=None, saveDir=None, figures=None, figureFormat='png', dpi=72, figureSize=(11, 7)):
+def _plotScoresLocal(data, metadata, pcaModel, classType, name,
+                     alpha=0.05, plotAssociation=None, r_threshold=None,
+                     kw_threshold=None, saveDir=None, figures=None,
+                     figureFormat='png', dpi=72, figureSize=(11, 7)):
     """
 	Local function to plot scores for each metadata field
 	"""