Add analyseMVA.py example

Also add helpers to plot multiple histograms on a single canvas
cms-sw · Jun 16, 2017 · 46c6f68 · 46c6f68
1 parent 264d900
commit 46c6f68
Show file tree

Hide file tree

Showing 4 changed files with 309 additions and 40 deletions.
diff --git a/Validation/RecoTrack/README.md b/Validation/RecoTrack/README.md
@@ -105,6 +105,7 @@ Use `--help` to check out the parameters.
 
 * [`trackingNtupleExample.py`](test/trackingNtupleExample.py) examples of various links
 * [`analyseDuplicateFake.py`](test/analyseDuplicateFake.py) examples of printouts
+* [`analyseMVA.py`](test/analyseMVA.py) simple analysis for debugging track MVA selection
 * [`fakeAnalysis/main.py`](test/fakeAnalysis/main.py) complete analysis code for fake tracks
 
 

diff --git a/Validation/RecoTrack/python/plotting/ntuplePlotting.py b/Validation/RecoTrack/python/plotting/ntuplePlotting.py
@@ -1,3 +1,6 @@
+import collections
+import itertools
+
 import ROOT
 
 import Validation.RecoTrack.plotting.plotting as plotting
@@ -13,57 +16,29 @@ def applyStyle(h, color, markerStyle):
     h.SetLineColor(color)
     h.SetLineWidth(2)
 
-def draw(name, histos, styles, legendLabels=[],
-         xtitle=None, ytitle=None,
-         drawOpt="HIST",
-         legendDx=0, legendDy=0, legendDw=0, legendDh=0,
-         xmin=None, ymin=0, xmax=None, ymax=None, xlog=False, ylog=False,
-         xgrid=True, ygrid=True,
-         ratio=False, ratioYmin=0.5, ratioYmax=1.5, ratioYTitle=plotting._ratioYTitle
-        ):
+# https://stackoverflow.com/questions/6076270/python-lambda-function-in-list-comprehensions
+_defaultStyles = [(lambda c, m: (lambda h: applyStyle(h, c, m)))(color, ms) for color, ms in itertools.izip(plotting._plotStylesColor, plotting._plotStylesMarker)]
+
+_ratioFactor = 1.25
 
+def draw(name, histos, styles=_defaultStyles, legendLabels=[], **kwargs):
     width = 600
     height = 600
     ratioFactor = 1.25
 
+    args = {}
+    args.update(kwargs)
+    if not "ratioFactor" in args:
+        args["ratioFactor"] = _ratioFactor
+    ratio = args.get("ratio", False)
+
     if ratio:
         height = int(height*ratioFactor)
     c = plotting._createCanvas(name, width, height)
     if ratio:
         plotting._modifyPadForRatio(c, ratioFactor)
 
-    bounds = plotting._findBounds(histos, ylog, xmin, xmax, ymin, ymax)
-    args = {"nrows": 1}
-    if ratio:
-        ratioBounds = (bounds[0], ratioYmin, bounds[2], ratioYmax)
-        frame = plotting.FrameRatio(c, bounds, ratioBounds, ratioFactor, ratioYTitle=ratioYTitle, **args)
-        #frame._frameRatio.GetYaxis().SetLabelSize(0.12)
-    else:
-        frame = plotting.Frame(c, bounds, **args)
-
-    if xtitle is not None:
-        frame.setXTitle(xtitle)
-    if ytitle is not None:
-        frame.setYTitle(ytitle)
-
-    frame.setLogx(xlog)
-    frame.setLogy(ylog)
-    frame.setGridx(xgrid)
-    frame.setGridy(ygrid)
-
-    if ratio:
-        frame._pad.cd()
-    for h, st in zip(histos, styles):
-        st(h)
-        h.Draw(drawOpt+" same")
-
-    ratios = None
-    if ratio:
-        frame._padRatio.cd()
-        ratios = plotting._calculateRatios(histos)
-        for r in ratios[1:]:
-            r.draw()
-        frame._pad.cd()
+    frame = drawSingle(c, histos, styles, **args)
 
     if len(legendLabels) > 0:
         if len(legendLabels) != len(histos):
@@ -100,3 +75,97 @@ def draw(name, histos, styles, legendLabels=[],
     c.RedrawAxis()
     c.SaveAs(name+".png")
     c.SaveAs(name+".pdf")
+
+
+def drawSingle(pad, histos, styles=_defaultStyles,
+               nrows=1,
+               xtitle=None, ytitle=None,
+               drawOpt="HIST",
+               legendDx=0, legendDy=0, legendDw=0, legendDh=0,
+               xmin=None, ymin=0, xmax=None, ymax=None, xlog=False, ylog=False,
+               xgrid=True, ygrid=True,
+               ratio=False, ratioYmin=0.5, ratioYmax=1.5, ratioYTitle=plotting._ratioYTitle, ratioFactor=1.25):
+
+    bounds = plotting._findBounds(histos, ylog, xmin, xmax, ymin, ymax)
+    if ratio:
+        ratioBounds = (bounds[0], ratioYmin, bounds[2], ratioYmax)
+        frame = plotting.FrameRatio(pad, bounds, ratioBounds, ratioFactor, ratioYTitle=ratioYTitle, nrows=nrows)
+        #frame._frameRatio.GetYaxis().SetLabelSize(0.12)
+    else:
+        frame = plotting.Frame(pad, bounds, nrows=nrows)
+
+    if xtitle is not None:
+        frame.setXTitle(xtitle)
+    if ytitle is not None:
+        frame.setYTitle(ytitle)
+
+    frame.setLogx(xlog)
+    frame.setLogy(ylog)
+    frame.setGridx(xgrid)
+    frame.setGridy(ygrid)
+
+    if ratio:
+        frame._pad.cd()
+    for i, h in enumerate(histos):
+        st = styles[i%len(styles)]
+        st(h)
+        h.Draw(drawOpt+" same")
+
+    ratios = None
+    if ratio:
+        frame._padRatio.cd()
+        ratios = plotting._calculateRatios(histos)
+        for r in ratios[1:]:
+            r.draw()
+        frame._pad.cd()
+
+    return frame
+
+
+def drawMany(name, histoDicts, styles=_defaultStyles, opts={}, ncolumns=4):
+    if len(histoDicts) == 0:
+        return
+
+    histoNames = histoDicts[0].keys()
+    ratio = False
+    ratioFactor = _ratioFactor
+    for opt in opts.itervalues():
+        if "ratio" in opt:
+            ratio = True
+        if "ratioFactor" in opt:
+            ratioFactor = max(ratioFactor, opt["ratioFactor"])
+
+    nhistos = len(histoNames)
+    nrows = int((nhistos+ncolumns-1)/ncolumns)
+
+    width = 500*ncolumns
+    height = 500*nrows
+    if ratio:
+        height = int(_ratioFactor*height)
+
+    canvas = plotting._createCanvas(name, width, height)
+    canvas.Divide(ncolumns, nrows)
+
+    histos = collections.defaultdict(list)
+
+    for d in histoDicts:
+        for n, h in d.iteritems():
+            histos[n].append(h)
+
+    for i, histoName in enumerate(histoNames):
+        pad = canvas.cd(i+1)
+
+        args = {}
+        args.update(opts.get(histoName, {}))
+        if "ratio" in args:
+            if not "ratioFactor" in args:
+                args["ratioFactor"] = _ratioFactor # use the default, not the max
+            plotting._modifyPadForRatio(pad, args["ratioFactor"])
+
+        frame = drawSingle(pad, histos[histoName], styles, nrows, **args)
+        frame._pad.cd()
+        frame._pad.Update()
+        frame._pad.RedrawAxis()
+
+    canvas.SaveAs(name+".png")
+    canvas.SaveAs(name+".pdf")
diff --git a/Validation/RecoTrack/python/plotting/plotting.py b/Validation/RecoTrack/python/plotting/plotting.py
@@ -1062,6 +1062,9 @@ def __init__(self, pad, bounds, nrows, xbinlabels=None, xbinlabelsize=None, xbin
         if nrows == 2:
             yoffsetFactor *= 2
             xoffsetFactor *= 2
+        elif nrows >= 5:
+            yoffsetFactor *= 1.5
+            xoffsetFactor *= 1.5
         elif nrows >= 3:
             yoffsetFactor *= 4
             xoffsetFactor *= 3

diff --git a/Validation/RecoTrack/test/analyseMVA.py b/Validation/RecoTrack/test/analyseMVA.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python
+
+import array
+import collections
+import itertools
+
+import ROOT
+
+from Validation.RecoTrack.plotting.ntuple import *
+from Validation.RecoTrack.plotting.ntuplePlotting import *
+
+# This file is a sketch of a simple analysis for MVA debugging
+
+def selectMVA(track):
+    mva = track.mva()
+    return mva > 0.35 and mva < 0.6
+
+def selectTrue(track):
+    return track.nMatchedTrackingParticles() > 0
+
+def selectFake(track):
+    return track.nMatchedTrackingParticles() == 0
+
+def main():
+    ROOT.TH1.AddDirectory(False)
+
+    ntuple_new = TrackingNtuple("trackingNtuple.root")
+    ntuple_old = TrackingNtuple("trackingNtuple_oldMVA.root")
+
+    common = dict(
+        #ratio=True,
+    )
+    common_ylog = dict(
+        ylog=True,
+        ymin=0.5,
+    )
+    common_ylog.update(common)
+
+    opts = dict(
+        mva = dict(xtitle="MVA", **common_ylog),
+        pt  = dict(xtitle="p_{T}", xlog=True, **common_ylog),
+        eta = dict(xtitle="#eta", **common),
+        relpterr = dict(xtitle="p_{T} error / p_{T}", **common_ylog),
+        absdxy = dict(xtitle="|d_{xy}(BS)|", **common_ylog),
+        absdz = dict(xtitle="|d_{z}(BS)|", **common_ylog),
+        absdxypv = dict(xtitle="|d_{xy}(closest PV)|", **common_ylog),
+        absdzpv = dict(xtitle="|d_{z}(closest PV)|", **common_ylog),
+        nhits = dict(xtitle="hits", **common),
+        nlayers = dict(xtitle="layers", **common),
+        nlayers3D = dict(xtitle="3D layers", **common),
+        nlayersLost = dict(xtitle="lost layers", **common),
+        minlost = dict(xtitle="min(inner, outer) lost layers", **common_ylog),
+        lostmidfrac = dict(xtitle="(lost hits) / (lost + valid hits)", **common),
+        ndof = dict(xtitle="ndof", **common),
+        chi2 = dict(xtitle="chi2/ndof", **common_ylog),
+        chi2_1Dmod = dict(xtitle="chi2/ndof with 1D modification", **common_ylog),
+    )
+
+    if True:
+        histos_new = histos(ntuple_new)
+        histos_old = histos(ntuple_old)
+        drawMany("newMVA_vs_oldMVA", [histos_old, histos_new], opts=opts)
+
+    if True:
+        histos_new = histos(ntuple_new, selector=selectMVA)
+        histos_old = histos(ntuple_old, selector=selectMVA)
+        drawMany("newMVA_vs_oldMVA_mvaselected", [histos_old, histos_new], opts=opts)
+
+    if True:
+        histos_new = histos(ntuple_new, selector=selectTrue)
+        histos_old = histos(ntuple_old, selector=selectTrue)
+        drawMany("newMVA_vs_oldMVA_true", [histos_old, histos_new], opts=opts)
+
+    if True:
+        histos_new = histos(ntuple_new, selector=lambda t: selectTrue(t) and selectMVA(t))
+        histos_old = histos(ntuple_old, selector=lambda t: selectTrue(t) and selectMVA(t))
+        drawMany("newMVA_vs_oldMVA_true_mvaselected", [histos_old, histos_new], opts=opts)
+
+    if True:
+        histos_new = histos(ntuple_new, selector=selectFake)
+        histos_old = histos(ntuple_old, selector=selectFake)
+        drawMany("newMVA_vs_oldMVA_fake", [histos_old, histos_new], opts=opts)
+
+    if True:
+        histos_new = histos(ntuple_new, selector=lambda t: selectFake(t) and selectMVA(t))
+        histos_old = histos(ntuple_old, selector=lambda t: selectFake(t) and selectMVA(t))
+        drawMany("newMVA_vs_oldMVA_fake_mvaselected", [histos_old, histos_new], opts=opts)
+
+    if True:
+        (histos_old, histos_new) = histos2(ntuple_old, ntuple_new, selectMVA)
+        drawMany("newMVA_vs_oldMVA_mvaSelectedNew", [histos_old, histos_new], opts=opts)
+
+    if True:
+        (histos_old, histos_new) = histos2(ntuple_old, ntuple_new, lambda t: selectTrue(t) and selectMVA(t))
+        drawMany("newMVA_vs_oldMVA_true_mvaSelectedNew", [histos_old, histos_new], opts=opts)
+
+    if True:
+        (histos_old, histos_new) = histos2(ntuple_old, ntuple_new, lambda t: selectFake(t) and selectMVA(t))
+        drawMany("newMVA_vs_oldMVA_fake_mvaSelectedNew", [histos_old, histos_new], opts=opts)
+
+
+def makeHistos():
+    h = collections.OrderedDict()
+    def addTH(name, *args, **kwargs):
+        _h = ROOT.TH1F(name, name, *args)
+        if kwargs.get("xlog", False):
+            axis = _h.GetXaxis()
+            bins = axis.GetNbins()
+            minLog10 = math.log10(axis.GetXmin())
+            maxLog10 = math.log10(axis.GetXmax())
+            width = (maxLog10-minLog10)/bins
+            new_bins = array.array("d", [0]*(bins+1))
+            new_bins[0] = 10**minLog10
+            mult = 10**width
+            for i in xrange(1, bins+1):
+                new_bins[i] = new_bins[i-1]*mult
+            axis.Set(bins, new_bins)
+        h[name] = _h
+
+    addTH("mva", 80, -1, 1)
+    addTH("pt", 40, 0.1, 1000, xlog=True)
+    addTH("eta", 60, -3, 3)
+    addTH("relpterr", 20, 0, 1)
+
+    addTH("absdxy", 50, 0, 1)
+    addTH("absdz", 30, 0, 15)
+    addTH("absdxypv", 50, 0., 0.5)
+    addTH("absdzpv", 20, 0, 1)
+
+    addTH("nhits", 41, -0.5, 40.5)
+    addTH("nlayers", 26, -0.5, 25.5)
+    addTH("nlayers3D", 26, -0.5, 25.5)
+    addTH("nlayersLost", 6, -0.5, 5.5)
+    addTH("minlost", 6, -0.5, 5.5)
+    addTH("lostmidfrac", 20, 0, 1)
+
+    addTH("ndof", 20, 0, 20)
+    addTH("chi2", 40, 0, 20)
+    addTH("chi2_1Dmod", 40, 0, 20)
+
+    return h
+
+def fillHistos(h, track):
+    h["mva"].Fill(track.mva())
+    h["pt"].Fill(track.pt())
+    h["eta"].Fill(track.eta())
+    h["ndof"].Fill(track.ndof())
+    h["nlayers"].Fill(track.nPixelLay()+track.nStripLay())
+    h["nlayers3D"].Fill(track.n3DLay())
+    h["nlayersLost"].Fill(track.nLostLay())
+    h["chi2"].Fill(track.nChi2())
+    h["chi2_1Dmod"].Fill(track.nChi2_1Dmod())
+    h["relpterr"].Fill(track.ptErr()/track.pt())
+    h["nhits"].Fill(track.nValid())
+    h["minlost"].Fill(min(track.nInnerLost(), track.nOuterLost()))
+    h["lostmidfrac"].Fill(track.nInvalid() / (track.nValid() + track.nInvalid()))
+
+    h["absdxy"].Fill(abs(track.dxy()))
+    h["absdz"].Fill(abs(track.dz()))
+    h["absdxypv"].Fill(abs(track.dxyClosestPV()))
+    h["absdzpv"].Fill(abs(track.dzClosestPV()))
+
+def histos(ntuple, selector=None):
+    h = makeHistos()
+
+    for event in ntuple:
+        for track in event.tracks():
+            if selector is not None and not selector(track):
+                continue
+            fillHistos(h, track)
+
+    return h
+
+def histos2(ntuple1, ntuple2, selector2=None):
+    # assume the two ntuples have the very same tracks except possibly
+    # for their parameters
+    h1 = makeHistos()
+    h2 = makeHistos()
+
+    for (event1, event2) in itertools.izip(ntuple1, ntuple2):
+        #print event1.eventIdStr(), event2.eventIdStr()
+        if event1.eventId() != event2.eventId():
+            raise Exception("Inconsistent events %s != %s" % (event1.eventIdStr(), event2.eventIdStr()))
+
+        for (track1, track2) in itertools.izip(event1.tracks(), event2.tracks()):
+
+            if selector2 is not None and not selector2(track2):
+                continue
+
+            fillHistos(h1, track1)
+            fillHistos(h2, track2)
+    return (h1, h2)
+
+
+if __name__ == "__main__":
+    main()