Skip to content

Commit

Permalink
Add analyseMVA.py example
Browse files Browse the repository at this point in the history
Also add helpers to plot multiple histograms on a single canvas
  • Loading branch information
makortel committed Jun 16, 2017
1 parent 264d900 commit 46c6f68
Show file tree
Hide file tree
Showing 4 changed files with 309 additions and 40 deletions.
1 change: 1 addition & 0 deletions Validation/RecoTrack/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ Use `--help` to check out the parameters.

* [`trackingNtupleExample.py`](test/trackingNtupleExample.py) examples of various links
* [`analyseDuplicateFake.py`](test/analyseDuplicateFake.py) examples of printouts
* [`analyseMVA.py`](test/analyseMVA.py) simple analysis for debugging track MVA selection
* [`fakeAnalysis/main.py`](test/fakeAnalysis/main.py) complete analysis code for fake tracks


Expand Down
149 changes: 109 additions & 40 deletions Validation/RecoTrack/python/plotting/ntuplePlotting.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import collections
import itertools

import ROOT

import Validation.RecoTrack.plotting.plotting as plotting
Expand All @@ -13,57 +16,29 @@ def applyStyle(h, color, markerStyle):
h.SetLineColor(color)
h.SetLineWidth(2)

def draw(name, histos, styles, legendLabels=[],
xtitle=None, ytitle=None,
drawOpt="HIST",
legendDx=0, legendDy=0, legendDw=0, legendDh=0,
xmin=None, ymin=0, xmax=None, ymax=None, xlog=False, ylog=False,
xgrid=True, ygrid=True,
ratio=False, ratioYmin=0.5, ratioYmax=1.5, ratioYTitle=plotting._ratioYTitle
):
# https://stackoverflow.com/questions/6076270/python-lambda-function-in-list-comprehensions
_defaultStyles = [(lambda c, m: (lambda h: applyStyle(h, c, m)))(color, ms) for color, ms in itertools.izip(plotting._plotStylesColor, plotting._plotStylesMarker)]

_ratioFactor = 1.25

def draw(name, histos, styles=_defaultStyles, legendLabels=[], **kwargs):
width = 600
height = 600
ratioFactor = 1.25

args = {}
args.update(kwargs)
if not "ratioFactor" in args:
args["ratioFactor"] = _ratioFactor
ratio = args.get("ratio", False)

if ratio:
height = int(height*ratioFactor)
c = plotting._createCanvas(name, width, height)
if ratio:
plotting._modifyPadForRatio(c, ratioFactor)

bounds = plotting._findBounds(histos, ylog, xmin, xmax, ymin, ymax)
args = {"nrows": 1}
if ratio:
ratioBounds = (bounds[0], ratioYmin, bounds[2], ratioYmax)
frame = plotting.FrameRatio(c, bounds, ratioBounds, ratioFactor, ratioYTitle=ratioYTitle, **args)
#frame._frameRatio.GetYaxis().SetLabelSize(0.12)
else:
frame = plotting.Frame(c, bounds, **args)

if xtitle is not None:
frame.setXTitle(xtitle)
if ytitle is not None:
frame.setYTitle(ytitle)

frame.setLogx(xlog)
frame.setLogy(ylog)
frame.setGridx(xgrid)
frame.setGridy(ygrid)

if ratio:
frame._pad.cd()
for h, st in zip(histos, styles):
st(h)
h.Draw(drawOpt+" same")

ratios = None
if ratio:
frame._padRatio.cd()
ratios = plotting._calculateRatios(histos)
for r in ratios[1:]:
r.draw()
frame._pad.cd()
frame = drawSingle(c, histos, styles, **args)

if len(legendLabels) > 0:
if len(legendLabels) != len(histos):
Expand Down Expand Up @@ -100,3 +75,97 @@ def draw(name, histos, styles, legendLabels=[],
c.RedrawAxis()
c.SaveAs(name+".png")
c.SaveAs(name+".pdf")


def drawSingle(pad, histos, styles=_defaultStyles,
nrows=1,
xtitle=None, ytitle=None,
drawOpt="HIST",
legendDx=0, legendDy=0, legendDw=0, legendDh=0,
xmin=None, ymin=0, xmax=None, ymax=None, xlog=False, ylog=False,
xgrid=True, ygrid=True,
ratio=False, ratioYmin=0.5, ratioYmax=1.5, ratioYTitle=plotting._ratioYTitle, ratioFactor=1.25):

bounds = plotting._findBounds(histos, ylog, xmin, xmax, ymin, ymax)
if ratio:
ratioBounds = (bounds[0], ratioYmin, bounds[2], ratioYmax)
frame = plotting.FrameRatio(pad, bounds, ratioBounds, ratioFactor, ratioYTitle=ratioYTitle, nrows=nrows)
#frame._frameRatio.GetYaxis().SetLabelSize(0.12)
else:
frame = plotting.Frame(pad, bounds, nrows=nrows)

if xtitle is not None:
frame.setXTitle(xtitle)
if ytitle is not None:
frame.setYTitle(ytitle)

frame.setLogx(xlog)
frame.setLogy(ylog)
frame.setGridx(xgrid)
frame.setGridy(ygrid)

if ratio:
frame._pad.cd()
for i, h in enumerate(histos):
st = styles[i%len(styles)]
st(h)
h.Draw(drawOpt+" same")

ratios = None
if ratio:
frame._padRatio.cd()
ratios = plotting._calculateRatios(histos)
for r in ratios[1:]:
r.draw()
frame._pad.cd()

return frame


def drawMany(name, histoDicts, styles=_defaultStyles, opts={}, ncolumns=4):
if len(histoDicts) == 0:
return

histoNames = histoDicts[0].keys()
ratio = False
ratioFactor = _ratioFactor
for opt in opts.itervalues():
if "ratio" in opt:
ratio = True
if "ratioFactor" in opt:
ratioFactor = max(ratioFactor, opt["ratioFactor"])

nhistos = len(histoNames)
nrows = int((nhistos+ncolumns-1)/ncolumns)

width = 500*ncolumns
height = 500*nrows
if ratio:
height = int(_ratioFactor*height)

canvas = plotting._createCanvas(name, width, height)
canvas.Divide(ncolumns, nrows)

histos = collections.defaultdict(list)

for d in histoDicts:
for n, h in d.iteritems():
histos[n].append(h)

for i, histoName in enumerate(histoNames):
pad = canvas.cd(i+1)

args = {}
args.update(opts.get(histoName, {}))
if "ratio" in args:
if not "ratioFactor" in args:
args["ratioFactor"] = _ratioFactor # use the default, not the max
plotting._modifyPadForRatio(pad, args["ratioFactor"])

frame = drawSingle(pad, histos[histoName], styles, nrows, **args)
frame._pad.cd()
frame._pad.Update()
frame._pad.RedrawAxis()

canvas.SaveAs(name+".png")
canvas.SaveAs(name+".pdf")
3 changes: 3 additions & 0 deletions Validation/RecoTrack/python/plotting/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,6 +1062,9 @@ def __init__(self, pad, bounds, nrows, xbinlabels=None, xbinlabelsize=None, xbin
if nrows == 2:
yoffsetFactor *= 2
xoffsetFactor *= 2
elif nrows >= 5:
yoffsetFactor *= 1.5
xoffsetFactor *= 1.5
elif nrows >= 3:
yoffsetFactor *= 4
xoffsetFactor *= 3
Expand Down
196 changes: 196 additions & 0 deletions Validation/RecoTrack/test/analyseMVA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#!/usr/bin/env python

import array
import collections
import itertools

import ROOT

from Validation.RecoTrack.plotting.ntuple import *
from Validation.RecoTrack.plotting.ntuplePlotting import *

# This file is a sketch of a simple analysis for MVA debugging

def selectMVA(track):
mva = track.mva()
return mva > 0.35 and mva < 0.6

def selectTrue(track):
return track.nMatchedTrackingParticles() > 0

def selectFake(track):
return track.nMatchedTrackingParticles() == 0

def main():
ROOT.TH1.AddDirectory(False)

ntuple_new = TrackingNtuple("trackingNtuple.root")
ntuple_old = TrackingNtuple("trackingNtuple_oldMVA.root")

common = dict(
#ratio=True,
)
common_ylog = dict(
ylog=True,
ymin=0.5,
)
common_ylog.update(common)

opts = dict(
mva = dict(xtitle="MVA", **common_ylog),
pt = dict(xtitle="p_{T}", xlog=True, **common_ylog),
eta = dict(xtitle="#eta", **common),
relpterr = dict(xtitle="p_{T} error / p_{T}", **common_ylog),
absdxy = dict(xtitle="|d_{xy}(BS)|", **common_ylog),
absdz = dict(xtitle="|d_{z}(BS)|", **common_ylog),
absdxypv = dict(xtitle="|d_{xy}(closest PV)|", **common_ylog),
absdzpv = dict(xtitle="|d_{z}(closest PV)|", **common_ylog),
nhits = dict(xtitle="hits", **common),
nlayers = dict(xtitle="layers", **common),
nlayers3D = dict(xtitle="3D layers", **common),
nlayersLost = dict(xtitle="lost layers", **common),
minlost = dict(xtitle="min(inner, outer) lost layers", **common_ylog),
lostmidfrac = dict(xtitle="(lost hits) / (lost + valid hits)", **common),
ndof = dict(xtitle="ndof", **common),
chi2 = dict(xtitle="chi2/ndof", **common_ylog),
chi2_1Dmod = dict(xtitle="chi2/ndof with 1D modification", **common_ylog),
)

if True:
histos_new = histos(ntuple_new)
histos_old = histos(ntuple_old)
drawMany("newMVA_vs_oldMVA", [histos_old, histos_new], opts=opts)

if True:
histos_new = histos(ntuple_new, selector=selectMVA)
histos_old = histos(ntuple_old, selector=selectMVA)
drawMany("newMVA_vs_oldMVA_mvaselected", [histos_old, histos_new], opts=opts)

if True:
histos_new = histos(ntuple_new, selector=selectTrue)
histos_old = histos(ntuple_old, selector=selectTrue)
drawMany("newMVA_vs_oldMVA_true", [histos_old, histos_new], opts=opts)

if True:
histos_new = histos(ntuple_new, selector=lambda t: selectTrue(t) and selectMVA(t))
histos_old = histos(ntuple_old, selector=lambda t: selectTrue(t) and selectMVA(t))
drawMany("newMVA_vs_oldMVA_true_mvaselected", [histos_old, histos_new], opts=opts)

if True:
histos_new = histos(ntuple_new, selector=selectFake)
histos_old = histos(ntuple_old, selector=selectFake)
drawMany("newMVA_vs_oldMVA_fake", [histos_old, histos_new], opts=opts)

if True:
histos_new = histos(ntuple_new, selector=lambda t: selectFake(t) and selectMVA(t))
histos_old = histos(ntuple_old, selector=lambda t: selectFake(t) and selectMVA(t))
drawMany("newMVA_vs_oldMVA_fake_mvaselected", [histos_old, histos_new], opts=opts)

if True:
(histos_old, histos_new) = histos2(ntuple_old, ntuple_new, selectMVA)
drawMany("newMVA_vs_oldMVA_mvaSelectedNew", [histos_old, histos_new], opts=opts)

if True:
(histos_old, histos_new) = histos2(ntuple_old, ntuple_new, lambda t: selectTrue(t) and selectMVA(t))
drawMany("newMVA_vs_oldMVA_true_mvaSelectedNew", [histos_old, histos_new], opts=opts)

if True:
(histos_old, histos_new) = histos2(ntuple_old, ntuple_new, lambda t: selectFake(t) and selectMVA(t))
drawMany("newMVA_vs_oldMVA_fake_mvaSelectedNew", [histos_old, histos_new], opts=opts)


def makeHistos():
h = collections.OrderedDict()
def addTH(name, *args, **kwargs):
_h = ROOT.TH1F(name, name, *args)
if kwargs.get("xlog", False):
axis = _h.GetXaxis()
bins = axis.GetNbins()
minLog10 = math.log10(axis.GetXmin())
maxLog10 = math.log10(axis.GetXmax())
width = (maxLog10-minLog10)/bins
new_bins = array.array("d", [0]*(bins+1))
new_bins[0] = 10**minLog10
mult = 10**width
for i in xrange(1, bins+1):
new_bins[i] = new_bins[i-1]*mult
axis.Set(bins, new_bins)
h[name] = _h

addTH("mva", 80, -1, 1)
addTH("pt", 40, 0.1, 1000, xlog=True)
addTH("eta", 60, -3, 3)
addTH("relpterr", 20, 0, 1)

addTH("absdxy", 50, 0, 1)
addTH("absdz", 30, 0, 15)
addTH("absdxypv", 50, 0., 0.5)
addTH("absdzpv", 20, 0, 1)

addTH("nhits", 41, -0.5, 40.5)
addTH("nlayers", 26, -0.5, 25.5)
addTH("nlayers3D", 26, -0.5, 25.5)
addTH("nlayersLost", 6, -0.5, 5.5)
addTH("minlost", 6, -0.5, 5.5)
addTH("lostmidfrac", 20, 0, 1)

addTH("ndof", 20, 0, 20)
addTH("chi2", 40, 0, 20)
addTH("chi2_1Dmod", 40, 0, 20)

return h

def fillHistos(h, track):
h["mva"].Fill(track.mva())
h["pt"].Fill(track.pt())
h["eta"].Fill(track.eta())
h["ndof"].Fill(track.ndof())
h["nlayers"].Fill(track.nPixelLay()+track.nStripLay())
h["nlayers3D"].Fill(track.n3DLay())
h["nlayersLost"].Fill(track.nLostLay())
h["chi2"].Fill(track.nChi2())
h["chi2_1Dmod"].Fill(track.nChi2_1Dmod())
h["relpterr"].Fill(track.ptErr()/track.pt())
h["nhits"].Fill(track.nValid())
h["minlost"].Fill(min(track.nInnerLost(), track.nOuterLost()))
h["lostmidfrac"].Fill(track.nInvalid() / (track.nValid() + track.nInvalid()))

h["absdxy"].Fill(abs(track.dxy()))
h["absdz"].Fill(abs(track.dz()))
h["absdxypv"].Fill(abs(track.dxyClosestPV()))
h["absdzpv"].Fill(abs(track.dzClosestPV()))

def histos(ntuple, selector=None):
h = makeHistos()

for event in ntuple:
for track in event.tracks():
if selector is not None and not selector(track):
continue
fillHistos(h, track)

return h

def histos2(ntuple1, ntuple2, selector2=None):
# assume the two ntuples have the very same tracks except possibly
# for their parameters
h1 = makeHistos()
h2 = makeHistos()

for (event1, event2) in itertools.izip(ntuple1, ntuple2):
#print event1.eventIdStr(), event2.eventIdStr()
if event1.eventId() != event2.eventId():
raise Exception("Inconsistent events %s != %s" % (event1.eventIdStr(), event2.eventIdStr()))

for (track1, track2) in itertools.izip(event1.tracks(), event2.tracks()):

if selector2 is not None and not selector2(track2):
continue

fillHistos(h1, track1)
fillHistos(h2, track2)
return (h1, h2)


if __name__ == "__main__":
main()

0 comments on commit 46c6f68

Please sign in to comment.