From 2232da2d8ab74b52dcd429ecc6127bbfa21625e1 Mon Sep 17 00:00:00 2001 From: Jazz Mack Smith Date: Fri, 24 Nov 2023 16:42:45 +0000 Subject: [PATCH] Hopeful fix for missing PCA plots --- nPYc/plotting/_multivariatePlotting.py | 106 ++++++++++--------------- nPYc/plotting/_plotting.py | 5 +- nPYc/reports/multivariateReport.py | 24 +++++- 3 files changed, 65 insertions(+), 70 deletions(-) diff --git a/nPYc/plotting/_multivariatePlotting.py b/nPYc/plotting/_multivariatePlotting.py index 519fa5e3..be5b01d3 100644 --- a/nPYc/plotting/_multivariatePlotting.py +++ b/nPYc/plotting/_multivariatePlotting.py @@ -87,39 +87,34 @@ def plotScores(pcaModel, classes=None, colourType=None, :param dict figures: If not ``None``, saves location of each figure for output in html report (see multivariateReport.py) """ - print("Plotting scores %s" % colourType) + #print("----->> Plotting scores %s" % colourType) # Check inputs + if not isinstance(pcaModel, ChemometricsPCA): raise TypeError('PCAmodel must be an instance of ChemometricsPCA') - # Preparation - values = pcaModel.scores - ns, nc = values.shape - - if colourType is not None and colourType not in {'categorical', 'continuous', 'continuousCentered'}: - raise ValueError('colourType must be == ' + str({'categorical', 'continuous', 'continuousCentered'})) - if classes is not None and colourType is None: raise ValueError('If classes is specified, colourType must be') - if classes is None: - classes = pandas.Series('Study Sample' for i in range(ns)) - colourType = 'categorical' + if colourType: + if colourType not in {'categorical', 'continuous', 'continuousCentered'}: + raise ValueError('colourType must be == ' + str({'categorical', 'continuous','continuousCentered'})) - uniq = classes.unique() - try: - uniq.sort() - except: - pass - if colourType == 'categorical': - classes = classes.astype(str) - - # If colourDict check colour defined for every unique entry in class - colourDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=colourDict, dictName="colourDict") - markerDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=markerDict, dictName="markerDict", defaultVal="o") + # If colourDict check colour defined for every unique entry in class + if classes is not None and colourDict is not None: + uniq = classes.unique() + if not all(k in colourDict.keys() for k in uniq): + print("dict keys are %s" % colourDict.keys()) + print("Category keys are %s" % uniq) + raise ValueError( + 'If classes and colourDict are specified every unique entry in class must be a key in colourDict') from matplotlib.patches import Ellipse + # Preparation + values = pcaModel.scores + ns, nc = values.shape + if components is None: components = numpy.ones([nc]).astype(bool) components = numpy.where(components == True) @@ -137,6 +132,19 @@ def plotScores(pcaModel, classes=None, colourType=None, else: plotTitle = '' + if classes is None: + classes = pandas.Series('Sample' for i in range(ns)) + colourType = 'categorical' + + if colourType == 'categorical': + classes = classes.astype(str) + + uniq = classes.unique() + try: + uniq.sort() + except: + pass + # Calculate critical value for Hotelling's T2 # Fval = f.ppf(0.95, 2, ns-2) # Plot scores for each pair of components @@ -175,15 +183,15 @@ def plotScores(pcaModel, classes=None, colourType=None, ax.set_ylim([(ymin + (0.2 * ymin)), ymax + (0.2 * ymax)]) if colourType == 'categorical': - # Plot according to user defined colours if available + if colourDict is not None: + for u in uniq: ax.scatter(values[classes.values == u, components[i]], values[classes.values == u, components[j]], c=colourDict[u], marker=markerDict[u], label=u, alpha=opacity) - else: colors_sns = {} @@ -329,6 +337,7 @@ def plotOutliers(values, runOrder, addViolin=False, sampleType=None, :param str xlabel: Label for the x-axis """ + print("Plotting outliers") # Preparation if isinstance(sampleType, (str, type(None))): sampleType = pandas.Series(['Sample' for i in range(0, len(values))], name='sampleType') @@ -369,47 +378,16 @@ def plotOutliers(values, runOrder, addViolin=False, sampleType=None, sampleMasks = [] palette = {} - print("colourDict %s" % colourDict) - print("markerDict %s" % markerDict) # Plot data coloured by sample type - # TODO: refactor this - if any(sampleType == 'Study Sample'): - x = 'Study Sample' - ax.scatter(runOrder[sampleType.values == x], - values[sampleType.values == x], - c=colourDict[x], - marker=markerDict[x], - label=x, alpha=opacity) - sampleMasks.append((abbrDict[x], sampleType.values == x)) - palette[abbrDict[x]] = colourDict[x] - - if any(sampleType == 'Study Reference'): - x = 'Study Reference' - ax.scatter(runOrder[sampleType.values == x], - values[sampleType.values == x], - c=colourDict[x], - marker=markerDict[x], - label=x, alpha=opacity) - sampleMasks.append((abbrDict[x], sampleType.values == x)) - palette[abbrDict[x]] = colourDict[x] - - if any(sampleType == 'Long-Term Reference'): - x = 'Long-Term Reference' - ax.scatter(runOrder[sampleType.values == x], - values[sampleType.values == x], - c=colourDict[x], - marker=markerDict[x], - label=x, alpha=opacity) - sampleMasks.append((abbrDict[x], sampleType.values == x)) - palette[abbrDict[x]] = colourDict[x] - - if any(sampleType == 'Sample'): - x = 'Sample' - ax.scatter(runOrder[sampleType.values == x], - values[sampleType.values == x], - label=x, alpha=opacity) - sampleMasks.append((abbrDict[x], sampleType.values == x)) - palette[abbrDict[x]] = colourDict[x] + for u in uniq: + sc = ax.scatter(runOrder[sampleType.values == u], + values[sampleType.values == u], + marker=markerDict[u], + c=colourDict[u], + alpha=opacity, + label=u) + sampleMasks.append((abbrDict[u], sampleType.values == u)) + palette[abbrDict[u]] = colourDict[u] xmin, xmax = ax.get_xlim() diff --git a/nPYc/plotting/_plotting.py b/nPYc/plotting/_plotting.py index ab1087fe..0afa7823 100644 --- a/nPYc/plotting/_plotting.py +++ b/nPYc/plotting/_plotting.py @@ -389,9 +389,10 @@ def checkAndSetPlotAttributes(uniqKeys, attribDict, dictName, defaultVal=None): # check all the keys of attribDict are in uniqKeys # putting this here to see if it's a useful refactor. It may not be. if attribDict is not None: + #print("dict keys are %s" % attribDict.keys()) + #print("Category keys are %s" % uniqKeys) if not all(k in attribDict.keys() for k in uniqKeys): - print(dictName + " keys are " + attribDict.keys()) - print("Category keys are " + uniqKeys) + raise ValueError( 'Check keys in ' + dictName + "; some aren't present in the categories list.") else: diff --git a/nPYc/reports/multivariateReport.py b/nPYc/reports/multivariateReport.py index b9348f0e..28aed594 100644 --- a/nPYc/reports/multivariateReport.py +++ b/nPYc/reports/multivariateReport.py @@ -705,8 +705,7 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio figuresKWscores = _plotScoresLocal(dataForPlotting, fields, pcaModel, - 'categorical', - data.name, + 'categorical', data.name, alpha=hotellings_alpha, plotAssociation=sigKru, kw_threshold=kw_threshold, @@ -716,11 +715,25 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio dpi=data.Attributes['dpi'], figureSize=data.Attributes['figureSize']) + figuresQCscores = plotScores(pcaModel, + classes=data.sampleMetadata['SampleClass'], + colourType='categorical', + colourDict=data.Attributes['sampleTypeColours'], + markerDict=data.Attributes['sampleTypeMarkers'], + title='SampleClass', + figures=figuresQCscores, + hotelling_alpha=hotellings_alpha, + savePath=saveAs, + figureFormat=data.Attributes['figureFormat'], + dpi=data.Attributes['dpi'], + figureSize=data.Attributes['figureSize']) + if destinationPath is not None: for key in figuresKWscores: if os.path.join(destinationPath, 'graphics') in str(figuresKWscores[key]): figuresKWscores[key] = re.sub('.*graphics', 'graphics', figuresKWscores[key]) item['KWscores'] = figuresKWscores + else: if destinationPath is None: print('\n' + item[ @@ -752,6 +765,7 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio pcaModel, 'categorical', data.name, + alpha=hotellings_alpha, plotAssociation=sigNone, saveDir=saveAs, @@ -791,8 +805,10 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio return None -def _plotScoresLocal(data, metadata, pcaModel, classType, name, alpha=0.05, plotAssociation=None, r_threshold=None, - kw_threshold=None, saveDir=None, figures=None, figureFormat='png', dpi=72, figureSize=(11, 7)): +def _plotScoresLocal(data, metadata, pcaModel, classType, name, + alpha=0.05, plotAssociation=None, r_threshold=None, + kw_threshold=None, saveDir=None, figures=None, + figureFormat='png', dpi=72, figureSize=(11, 7)): """ Local function to plot scores for each metadata field """