Skip to content

Commit

Permalink
Hopeful fix for missing PCA plots
Browse files Browse the repository at this point in the history
  • Loading branch information
Jazz Mack Smith committed Nov 24, 2023
1 parent 8e4fb6b commit 2232da2
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 70 deletions.
106 changes: 42 additions & 64 deletions nPYc/plotting/_multivariatePlotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,39 +87,34 @@ def plotScores(pcaModel, classes=None, colourType=None,
:param dict figures: If not ``None``, saves location of each figure for output in html report (see multivariateReport.py)
"""

print("Plotting scores %s" % colourType)
#print("----->> Plotting scores %s" % colourType)
# Check inputs

if not isinstance(pcaModel, ChemometricsPCA):
raise TypeError('PCAmodel must be an instance of ChemometricsPCA')

# Preparation
values = pcaModel.scores
ns, nc = values.shape

if colourType is not None and colourType not in {'categorical', 'continuous', 'continuousCentered'}:
raise ValueError('colourType must be == ' + str({'categorical', 'continuous', 'continuousCentered'}))

if classes is not None and colourType is None:
raise ValueError('If classes is specified, colourType must be')

if classes is None:
classes = pandas.Series('Study Sample' for i in range(ns))
colourType = 'categorical'
if colourType:
if colourType not in {'categorical', 'continuous', 'continuousCentered'}:
raise ValueError('colourType must be == ' + str({'categorical', 'continuous','continuousCentered'}))

uniq = classes.unique()
try:
uniq.sort()
except:
pass
if colourType == 'categorical':
classes = classes.astype(str)

# If colourDict check colour defined for every unique entry in class
colourDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=colourDict, dictName="colourDict")
markerDict = checkAndSetPlotAttributes(uniqKeys=uniq, attribDict=markerDict, dictName="markerDict", defaultVal="o")
# If colourDict check colour defined for every unique entry in class
if classes is not None and colourDict is not None:
uniq = classes.unique()
if not all(k in colourDict.keys() for k in uniq):
print("dict keys are %s" % colourDict.keys())
print("Category keys are %s" % uniq)
raise ValueError(
'If classes and colourDict are specified every unique entry in class must be a key in colourDict')

from matplotlib.patches import Ellipse

# Preparation
values = pcaModel.scores
ns, nc = values.shape

if components is None:
components = numpy.ones([nc]).astype(bool)
components = numpy.where(components == True)
Expand All @@ -137,6 +132,19 @@ def plotScores(pcaModel, classes=None, colourType=None,
else:
plotTitle = ''

if classes is None:
classes = pandas.Series('Sample' for i in range(ns))
colourType = 'categorical'

if colourType == 'categorical':
classes = classes.astype(str)

uniq = classes.unique()
try:
uniq.sort()
except:
pass

# Calculate critical value for Hotelling's T2
# Fval = f.ppf(0.95, 2, ns-2)
# Plot scores for each pair of components
Expand Down Expand Up @@ -175,15 +183,15 @@ def plotScores(pcaModel, classes=None, colourType=None,
ax.set_ylim([(ymin + (0.2 * ymin)), ymax + (0.2 * ymax)])

if colourType == 'categorical':

# Plot according to user defined colours if available

if colourDict is not None:

for u in uniq:
ax.scatter(values[classes.values == u, components[i]],
values[classes.values == u, components[j]],
c=colourDict[u], marker=markerDict[u],
label=u, alpha=opacity)

else:
colors_sns = {}

Expand Down Expand Up @@ -329,6 +337,7 @@ def plotOutliers(values, runOrder, addViolin=False, sampleType=None,
:param str xlabel: Label for the x-axis
"""

print("Plotting outliers")
# Preparation
if isinstance(sampleType, (str, type(None))):
sampleType = pandas.Series(['Sample' for i in range(0, len(values))], name='sampleType')
Expand Down Expand Up @@ -369,47 +378,16 @@ def plotOutliers(values, runOrder, addViolin=False, sampleType=None,
sampleMasks = []
palette = {}

print("colourDict %s" % colourDict)
print("markerDict %s" % markerDict)
# Plot data coloured by sample type
# TODO: refactor this
if any(sampleType == 'Study Sample'):
x = 'Study Sample'
ax.scatter(runOrder[sampleType.values == x],
values[sampleType.values == x],
c=colourDict[x],
marker=markerDict[x],
label=x, alpha=opacity)
sampleMasks.append((abbrDict[x], sampleType.values == x))
palette[abbrDict[x]] = colourDict[x]

if any(sampleType == 'Study Reference'):
x = 'Study Reference'
ax.scatter(runOrder[sampleType.values == x],
values[sampleType.values == x],
c=colourDict[x],
marker=markerDict[x],
label=x, alpha=opacity)
sampleMasks.append((abbrDict[x], sampleType.values == x))
palette[abbrDict[x]] = colourDict[x]

if any(sampleType == 'Long-Term Reference'):
x = 'Long-Term Reference'
ax.scatter(runOrder[sampleType.values == x],
values[sampleType.values == x],
c=colourDict[x],
marker=markerDict[x],
label=x, alpha=opacity)
sampleMasks.append((abbrDict[x], sampleType.values == x))
palette[abbrDict[x]] = colourDict[x]

if any(sampleType == 'Sample'):
x = 'Sample'
ax.scatter(runOrder[sampleType.values == x],
values[sampleType.values == x],
label=x, alpha=opacity)
sampleMasks.append((abbrDict[x], sampleType.values == x))
palette[abbrDict[x]] = colourDict[x]
for u in uniq:
sc = ax.scatter(runOrder[sampleType.values == u],
values[sampleType.values == u],
marker=markerDict[u],
c=colourDict[u],
alpha=opacity,
label=u)
sampleMasks.append((abbrDict[u], sampleType.values == u))
palette[abbrDict[u]] = colourDict[u]

xmin, xmax = ax.get_xlim()

Expand Down
5 changes: 3 additions & 2 deletions nPYc/plotting/_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,9 +389,10 @@ def checkAndSetPlotAttributes(uniqKeys, attribDict, dictName, defaultVal=None):
# check all the keys of attribDict are in uniqKeys
# putting this here to see if it's a useful refactor. It may not be.
if attribDict is not None:
#print("dict keys are %s" % attribDict.keys())
#print("Category keys are %s" % uniqKeys)
if not all(k in attribDict.keys() for k in uniqKeys):
print(dictName + " keys are " + attribDict.keys())
print("Category keys are " + uniqKeys)

raise ValueError(
'Check keys in ' + dictName + "; some aren't present in the categories list.")
else:
Expand Down
24 changes: 20 additions & 4 deletions nPYc/reports/multivariateReport.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,8 +705,7 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio
figuresKWscores = _plotScoresLocal(dataForPlotting,
fields,
pcaModel,
'categorical',
data.name,
'categorical', data.name,
alpha=hotellings_alpha,
plotAssociation=sigKru,
kw_threshold=kw_threshold,
Expand All @@ -716,11 +715,25 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio
dpi=data.Attributes['dpi'],
figureSize=data.Attributes['figureSize'])

figuresQCscores = plotScores(pcaModel,
classes=data.sampleMetadata['SampleClass'],
colourType='categorical',
colourDict=data.Attributes['sampleTypeColours'],
markerDict=data.Attributes['sampleTypeMarkers'],
title='SampleClass',
figures=figuresQCscores,
hotelling_alpha=hotellings_alpha,
savePath=saveAs,
figureFormat=data.Attributes['figureFormat'],
dpi=data.Attributes['dpi'],
figureSize=data.Attributes['figureSize'])

if destinationPath is not None:
for key in figuresKWscores:
if os.path.join(destinationPath, 'graphics') in str(figuresKWscores[key]):
figuresKWscores[key] = re.sub('.*graphics', 'graphics', figuresKWscores[key])
item['KWscores'] = figuresKWscores

else:
if destinationPath is None:
print('\n' + item[
Expand Down Expand Up @@ -752,6 +765,7 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio
pcaModel,
'categorical',
data.name,

alpha=hotellings_alpha,
plotAssociation=sigNone,
saveDir=saveAs,
Expand Down Expand Up @@ -791,8 +805,10 @@ def multivariateReport(dataTrue, pcaModel, reportType='analytical', withExclusio
return None


def _plotScoresLocal(data, metadata, pcaModel, classType, name, alpha=0.05, plotAssociation=None, r_threshold=None,
kw_threshold=None, saveDir=None, figures=None, figureFormat='png', dpi=72, figureSize=(11, 7)):
def _plotScoresLocal(data, metadata, pcaModel, classType, name,
alpha=0.05, plotAssociation=None, r_threshold=None,
kw_threshold=None, saveDir=None, figures=None,
figureFormat='png', dpi=72, figureSize=(11, 7)):
"""
Local function to plot scores for each metadata field
"""
Expand Down

0 comments on commit 2232da2

Please sign in to comment.