Skip to content

Commit

Permalink
Merge pull request #11 from emanca/dev
Browse files Browse the repository at this point in the history
control plots
  • Loading branch information
emanca authored Mar 7, 2019
2 parents 43af595 + a0c55f9 commit 37e300b
Show file tree
Hide file tree
Showing 47 changed files with 2,039 additions and 40 deletions.
26 changes: 17 additions & 9 deletions framework/RDFtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@

class RDFtree:

def __init__(self, inputFiles, outputDir, outputFiles, graphList=[]):
def __init__(self, inputFiles, outputDir, outputFiles, graphList=[], treeName='Events'):

self.inputFiles = inputFiles # list of input files
self.outputFiles = outputFiles # list of output files - one for each path
self.outputDir = outputDir # output directory
self.outputDir = outputDir # output directories
self.graphList = graphList
self.treeName = treeName
self.objList = [] # list of dictionaries containing objects to write
self.rdfOut = []
self.nodesToRestart = []
Expand Down Expand Up @@ -38,8 +39,7 @@ def getOutput(self):

if not os.path.exists(self.outputDir):
os.system("mkdir -p " + self.outputDir)

cwd = os.getcwd()

os.chdir(self.outputDir)

print time.time()-self.start, "before writing objects"
Expand All @@ -51,6 +51,8 @@ def getOutput(self):
fout = ROOT.TFile(outfile+'_{f}'.format(f=self.nIter)+'.root', "recreate")
fout.cd()
for h in hList:


h.Write()

os.chdir('..')
Expand Down Expand Up @@ -87,7 +89,7 @@ def run(self):
# this is the starting RDF to be recreated at the beginning of each path

RDF = ROOT.ROOT.RDataFrame
self.d = RDF("Events", self.inputFiles[j])
self.d = RDF(self.treeName, self.inputFiles[j])

print 'analysing path:', path
print self.nIter, 'iteration number:'
Expand All @@ -99,7 +101,8 @@ def run(self):
subpath = path[self.nodesToRestart[i]:]

#if subpath is empty it doesn't loop
print subpath, 'subpath'

objs[self.outputFiles[i]] = []

for idx, name in enumerate(subpath): # modifies RDF using modules in a sequential way

Expand Down Expand Up @@ -133,9 +136,9 @@ def run(self):
tmp_th2 = run.getTH2()
tmp_th3 = run.getTH3()

objs[self.outputFiles[i]] = []

for obj in tmp_th1:

if isinstance(obj, ROOT.TH1D):

objs[self.outputFiles[i]].append(ROOT.TH1D(obj))
Expand All @@ -158,12 +161,15 @@ def run(self):
objs[self.outputFiles[i]].append(ROOT.RDF.RResultPtr('TH3D')(obj))

self.objList.append(objs)


triggerLoop = run.triggerLoop()
print 'var triggerLoop', triggerLoop

self.rdfOut[i]=self.d

if triggerLoop == True:
print 'am i triggering loop?'
self.nodesToRestart[i]=self.nodesToRestart[i]+idx+1 # restart from that module
print self.nodesToRestart[i], 'index for path', i, 'idx', idx

Expand All @@ -177,18 +183,20 @@ def run(self):
opts.fLazy = True

print time.time()-self.start, "before snapshot"
out = self.d.Snapshot("Events",self.outputFiles[i], "", opts)
out = self.d.Snapshot(self.treeName,self.outputFiles[i], "", opts)

# dummy histogram to trigger snapshot

h = self.d.Histo1D("event")
h = self.d.Define("event", "1").Histo1D("event")
objs[self.outputFiles[i]].append(ROOT.RDF.RResultPtr('TH1D')(h))

print check, 'before increasing'
check = check +1

self.nIter = self.nIter+1
print 'triggered loop!'
self.getOutput() # this triggers loop
print check
if check == len(self.paths): stop = False #there is nothing to do so exit


Expand Down
120 changes: 120 additions & 0 deletions framework/RDFtreeV2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from header import *
import copy

class RDFtree:
def __init__(self, outputDir, inputFile,treeName='Events'):

self.outputDir = outputDir # output directory
self.inputFile = inputFile

self.treeName = treeName

RDF = ROOT.ROOT.RDataFrame
self.d = RDF(self.treeName, self.inputFile)
self.entries = self.d.Count() #stores lazily the number of events

self.modules = []

self.objs = {} # objects to be received from modules

self.node = {} # dictionary branchName - RDF
self.node['input'] = self.d # assign input RDF to a branch called 'input'

self.graph = {} # save the graph to write it in the end


#start analysis
self.start = time.time()

def branch(self, nodeToStart, nodeToEnd, outputFile, modules=[]):

self.outputFile = outputFile
self.objs[self.outputFile] = []

if nodeToStart in self.graph:
self.graph[nodeToStart].append(nodeToEnd)
else:
self.graph[nodeToStart]=[nodeToEnd]

branchRDF = self.node[nodeToStart]

lenght = len(self.modules)

self.modules.extend(modules)

# modify RDF according to modules
for i, m in enumerate(self.modules[lenght:]):

branchRDF = m.run(CastToRNode(branchRDF))
tmp_th1 = m.getTH1()
tmp_th2 = m.getTH2()
tmp_th3 = m.getTH3()

for obj in tmp_th1:
self.objs[self.outputFile].append(ROOT.RDF.RResultPtr('TH1D')(obj))

for obj in tmp_th2:
self.objs[self.outputFile].append(ROOT.RDF.RResultPtr('TH2D')(obj))

for obj in tmp_th3:
self.objs[self.outputFile].append(ROOT.RDF.RResultPtr('TH3D')(obj))

self.node[nodeToEnd] = branchRDF


def takeSnapshot(self):

opts = ROOT.ROOT.RDF.RSnapshotOptions()
opts.fLazy = True

print time.time()-self.start, "before snapshot"
out = self.d.Snapshot(self.treeName,self.outputFile[i], "", opts)

# dummy histogram to trigger snapshot

h = self.d.Define("foo", "1").Histo1D("foo")
self.objs.append(ROOT.RDF.RResultPtr('TH1D')(h))


def getOutput(self):

# now write all the outputs together

print "writing output files in "+ self.outputDir

if not os.path.exists(self.outputDir):
os.system("mkdir -p " + self.outputDir)

os.chdir(self.outputDir)

for outfile, hList in self.objs.iteritems():

fout = ROOT.TFile(outfile, "recreate")
fout.cd()

for h in hList:

h.Write()


os.chdir('..')
self.objs = {} # re-initialise object list

print self.entries.GetValue(), " events processed in ", time.time()-self.start, " s"

def saveGraph(self):

from graphviz import Digraph

dot = Digraph(name='my analysis', filename = 'graph.pdf')

for node, nodelist in self.graph.iteritems():
for n in nodelist:

dot.node(node, n)


print(dot.source)

#dot.render(view=True)

19 changes: 18 additions & 1 deletion framework/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,21 @@ class NodeCaster {
def CastToRNode(node):
return ROOT.NodeCaster(node.__cppname__).Cast(node)

# end code for casting
# end code for casting
"""
NSlots = 64
ROOT.gInterpreter.ProcessLine('''
std::vector<TRandom3> myRndGens({NSlots});
int seed = 1; // not 0 because seed 0 has a special meaning
for (auto &&gen : myRndGens) gen.SetSeed(seed++);
'''.format(NSlots = NSlots))
"""

getVector_code ='''
float getVector (std::vector<float> vec, int idx)
{
return vec[idx];
}
'''

ROOT.gInterpreter.Declare(getVector_code)
3 changes: 0 additions & 3 deletions framework/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,3 @@ std::vector<ROOT::RDF::RResultPtr<TH2D>> Module::getTH2(){
std::vector<ROOT::RDF::RResultPtr<TH3D>> Module::getTH3(){
return _h3List;
}
bool Module::triggerLoop(){
return _trigLoop;
}
1 change: 0 additions & 1 deletion framework/module.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class Module {
virtual std::vector<ROOT::RDF::RResultPtr<TH1D>> getTH1();
virtual std::vector<ROOT::RDF::RResultPtr<TH2D>> getTH2();
virtual std::vector<ROOT::RDF::RResultPtr<TH3D>> getTH3();
virtual bool triggerLoop();

};

Expand Down
48 changes: 48 additions & 0 deletions framework/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Base class from which the other modules will inherit

class module:

def __init__(self):

self.myTH1 = []
self.myTH2 = []
self.myTH3 = []


def run(self,d):

pass

def defineSubcollectionFromIndex(self, collection, subcollection, idx, d):

columns = list(d.GetColumnNames())
columns.extend(d.GetDefinedColumnNames())

main = [c for c in columns if c.startswith(collection)] # columns of the main collection

subSet = [c.replace(collection,subcollection) for c in main if c.startswith(collection)] # columns of the sub collection

for i,s in enumerate(subSet):

d = d.Define(s, '{vec}[{idx}]'.format(vec=main[i], idx=idx))

# define new vector length

d = d.Define("n{}".format(subcollection), "{}".format(1))


return d

def getTH1(self):

return self.myTH1

def getTH2(self):

return self.myTH2

def getTH3(self):

return self.myTH3


Loading

0 comments on commit 37e300b

Please sign in to comment.