Documentation.tex

\documentclass[12pt, a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage[english, ngerman]{babel}
\usepackage{amsmath}
\usepackage[utf8]{inputenc}
\usepackage{amsfonts}
\usepackage{graphicx}
\usepackage{grffile}
%\usepackage{capt-of}
\usepackage{hyperref}
\usepackage{multirow}
\usepackage{icomma}
\usepackage{siunitx}

\setlength{\oddsidemargin}{0.5cm}
\setlength{\evensidemargin}{0.5cm}
\setlength{\topmargin}{0cm}
\setlength{\topskip}{0cm}
\setlength{\footskip}{1cm}
\setlength{\textwidth}{15cm}
\setlength{\textheight}{23cm}
\selectlanguage{english}
\title{{\Huge \textbf{pyroot-plotscripts framework}} \\[3cm]Documentation\\[1cm] }

\newcommand{\args}{$^*$args}
\newcommand{\kwargs}{$^{**}$kwargs}
\begin{document}
\selectlanguage{english}
\maketitle
\newpage
\tableofcontents
\newpage

\section{Plottingscripts}

\subsection{\texttt{LimitsAll\_v20.py}}
The following preprocessing steps are executed:
		\begin{itemize}
			\item \textbf{initialization of analysisConfig:}\\
				\texttt{analysis = analysisClass.analysisConfig}\\
				\texttt{analysis.initArguments}\\
				\texttt{analysis.initAnalysisOptions}\\
				\texttt{analysis.initPlotConfig}
				
			\item \textbf{preparation of configData:}\\
				\texttt{configData = configClass.configData}\\
				\texttt{configData.initData}\\
				\texttt{configData.genDiscriminatorPlots}
				
			\item \textbf{definition of additional variables:}\\
				\texttt{configData.getAddVariables}\\
				\texttt{configData.getMEPDFAddVariables}\\
				\texttt{configData.getAdditionalDiscrimiatorPlots}
				
			\item \textbf{initialization of samples:}\\
				\texttt{configData.initSamples}
				
		\end{itemize}

~\\
If no \texttt{plotNumber} is chosen (i.e. the script is executed normally), the following steps can be enabled independently:
		\begin{itemize}
			\item \textbf{plotParallel:}\\
				\texttt{pP = plotParallel.plotParallel}\\
				\texttt{pP.setJson}\\
				\texttt{pP.setDataBases}\\
				\texttt{pP.setAddInterfaces}\\
				\texttt{pP.setCatNames}\\
				\texttt{pP.setCatSelections}\\
				\texttt{pP.setMaxEvts}\\
				\texttt{pP.run}\\
				\texttt{pP.checkHaddFiles}\\
				\texttt{pP.checkTermination}
				
			\item \textbf{optimizedRebinning:}\\
				\texttt{optBinning.optimizeBinning}
				
			\item \textbf{renameHistograms:}\\
				\texttt{pP.setRenameInput}\\
				\texttt{renameHistos.renameHistos}\\
				\texttt{pP.addData}
				
			\item \textbf{makeDatacards:}\\
				\texttt{makeDatacards.makeDatacardsParallel}
				
			\item \textbf{drawParallel:}\\
				\texttt{drawParallel.drawParallel}
		\end{itemize}
		
~\\		
If a \texttt{plotNumber} is chosen (i.e. the script is executed via \texttt{drawParallel} or is a single execute script), the following steps can be enabled independently:
		\begin{itemize}
			\item \textbf{drawParallel:}\\
				\texttt{configData.getDiscriminatorPlotByNumber}
				
			\item If any of the following options is activated, the first step is creating the needed lists:\\
				\texttt{gP = genPlots.genPlots}\\
				\texttt{gP.genList}
				
			\item \textbf{makeSimplePlots:}\\
				\texttt{gP.makeSimpleControlPlots}\\
				\texttt{gP.makeSimpleShapePlots}
				
			\item \textbf{makeMCControlPlots:}\\
				\texttt{gP.genNestedHistList}\\
				\texttt{gP.makeControlPlots}
				
			\item \textbf{makeEventYields:}\\
				\texttt{gP.makeEventYields}
		\end{itemize}
		

\newpage
\section{Util}


\subsection{class \texttt{analysisConfig.analysisConfig}}
The \texttt{analysisConfig} class is used to store most of the settings for the plotting processes. Some options are used for enabling certain steps of the script, some options are used for skipping certain steps under certain circumstances and other options are used to store paths or variables, which are used throughout the script.


\subsubsection{\textit{init}}
\texttt{analysisConfig(\args, signalProcess = "ttbb", discrName = "finaldiscr")}\\
\begin{tabular}{r|l}
\hline
\textbf{workdir} 		& absolute path to working directory\\ 
						& determines the place where all output is stored. \\

\textbf{pyrootdir} 		& absolute path to \texttt{pyroot-plotscripts} directory. \\

\textbf{rootPath} 		& absolute path to desired output root file. \\ 
						& determines in- and outputs during different steps of the script \\
						& a reasonable choice is \texttt{<workdir>L/output\_limitInput.root}. \\

\textbf{signalProcess} 	& name of the chosen signal process (default \texttt{ttbb}) \\
						& determines plot configuration and position of ttH-samples \\ 
						& viable choices are \texttt{ttbb}, \texttt{ttH} or \texttt{DM}.\\

\textbf{discrName} 		& name of the discrimnator (default \texttt{finaldiscr}) \\ 
						& used to determine the discriminator plots.\\
\hline
\end{tabular}

\subsubsection{\textit{initArguments}}
\texttt{analysisConfig.initArguments(argv = list())}\\
\begin{tabular}{r|l}
\hline
\textbf{argv}	& list of arguments, usually uses \texttt{sys.argv} as input. \\
\hline
\end{tabular}

\subsubsection{\textit{initAnalysisOptions}}
\texttt{analysisConfig.initAnalysisOptions(analysisOptions = \{\})}\\
\begin{tabular}{r|l}
\hline
\textbf{analysisOptions}	& dictionary of options, mostly booleans.\\
\hline
\end{tabular}
\\
The possible options and their explanations are summarized in table \ref{analysisOptions}


\begin{table}[h]
\caption{\textbf{analysis options.}}\label{analysisOptions}
\begin{tabular}{r|ll}
option							& default & \\
\hline
\texttt{plotParallel} 			& True	& activate \texttt{plotParallel} step\\
\texttt{drawParallel}			& True	& activate \texttt{drawParallel} step\\
\texttt{makeEventYields}		& True	& activate \texttt{makeEventYields} step\\
\texttt{makeDataCards}			& True	& activate \texttt{makeDataCards} step\\
\texttt{makeSimplePlots}		& True	& activate \texttt{makeSimplePlots} step\\
\texttt{makeMCControlPlots}		& True	& activate \texttt{makeMCControlPlots} step\\
\texttt{optimizedRebinning}		& ""	& activate \texttt{optimizedRebinning} step\\
\hline
\texttt{additionalPlotVariables}& []	& add more variables\\
\texttt{plotNumber}				& None	& set \texttt{plotNumber} variable for 														\texttt{drawParallel}\\
\texttt{singleExecute}			& False	& execute drawing steps without																\texttt{drawParallel}\\
\texttt{cirun}					& False	& fast test run with less events\\
\texttt{plotBlinded}			& False	& perform plotting steps blinded\\
\texttt{useOldRoot}				& False	& use the existing root file in workdir\\
\texttt{stopAfterCompile}		& False	& stop script after compiling the cpp program\\
\hline
\texttt{skipPlotParallel}		& False	& try to skip \texttt{plotParallel} NAF submission\\
\texttt{skipHaddParallel}		& False	& try to skip \texttt{haddParallel} NAF submission\\
\texttt{skipHaddFromWildcard}	& False	& try to skip \texttt{haddFromWildcard} NAF submission\\
\texttt{skipRenaming}			& False	& try to skip \texttt{parallelRenaming} NAF submission\\
\texttt{skipDatacards}			& False	& try to skip \texttt{makeDatacards} NAF submission\\

\end{tabular}
\end{table}


\subsubsection{\textit{initPlotConfig}}
\texttt{analysisConfig.initPlotConfig()}\\
takes the plotconfig determined by the signal process and imports the config in \texttt{<pyrootdir>/configs/} directory as a \texttt{analysisConfig} intern module.

\subsubsection{\textit{setLimitPath}}
\texttt{analysisConfig.setLimitPath(\kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{name}	&	part of the name for the ROOT file used as input for limit calculations.\\
				&	is determined via \texttt{plotParallel.ppRootPath\_<name>.root}.\\
				&	(default \texttt{limitInput}).\\
\hline
\end{tabular}
This funcion is used before the \texttt{renameHistos} step. The path is set and it is checked, wheter it already exists. If it does, the \texttt{renameHistos} step is skipped, as the output is already present.


\subsubsection{\textit{getPlotPath}}
\texttt{analysisClass.getPlotPath()}\\
Returns path to \texttt{<workdir>/outputPlots} if \texttt{analysisConfig.plotNumber} is specified. Otherwise returns nothing.


\subsection{class \texttt{configClass.configData}}
The \texttt{configData} class is used to initialize the data needed for the main steps of the script. It handles the initialization of discriminator plots and samples.

\subsubsection{\textit{init}}
\texttt{ configData(\args, configDataBaseName = "")}\\
\begin{tabular}{r|l}
\hline
\texttt{analyisClass} 		& fully initialized instance of \texttt{analysisClass} \\
\texttt{configDataBaseName}	& name of config files in \texttt{<pyrootdir>/configs/} directory \\
							& for example \texttt{controlPlotsv13}.\\
\hline
\end{tabular}

\subsubsection{\textit{initData}}
\label{catData}
\texttt{configData.initData()}\\
Initializes an instance of \texttt{catData} class, which is used to store plots.
It consists of following lists:
\begin{small}
\begin{itemize}
\itemsep-0.7em
\item \texttt{discrs}
\item \texttt{nhistobins}
\item \texttt{minxvals}
\item \texttt{maxxvals}
\item \texttt{categories}
\item \texttt{plotPreselections}
\item \texttt{binlabels}
\end{itemize}
\end{small}


\subsubsection{\textit{genDiscriminatorPlots}}
\texttt{configData.genDiscriminatorPlots(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{memexp} 	& string type expression for MEM-variables.\\
\hline
\end{tabular}
\\
Imports \texttt{<pyrootdir>/configs/<basename>\_plots} as config file for generating the discriminator plots and calls its functions \texttt{getDiscriminatorPlots} and \texttt{evtYieldCategories}, thereby creating the lists \texttt{discriminatorPlots} and \texttt{evtYieldCategories}.\\
For the documentation of \texttt{<pyrootdir>/configs/<basename>\_plots} files see \ref{configs_plots}.

\subsubsection{\textit{writeConfigDataToWorkdir}}
\texttt{configData.writeConfigDataToWorkdir()}\\
Creates csv file \texttt{<workdir>/configData.csv} and writes content of \texttt{catData} class to it.


\subsubsection{\textit{getAddVariables}}
\texttt{configData.getAddVariables()}\\
Imports \texttt{<pyrootdir>/configs/<basename>\_addVariables} as config file for generating the discriminator plots and calls its function \texttt{getAddVars}, thereby creating the list \texttt{addVars}, containing additional variables which will be considered when writing the cpp file.\\
For the documentation of \texttt{<pyrootdir>/configs/<basename>\_addVariables} files see \ref{configs_addvars}.


\subsubsection{\textit{getMEPDFAddVariables}}
\texttt{configData.getMEPDFAddVariables(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{csvfile}	& path to csv-type file\\
					& containing names, weights and factors for matrix element pdf variables.\\
\hline
\end{tabular}
\\
Adds more variables to \texttt{addVars} list with \texttt{GetMEPDFadditionalVariablesList} function from \texttt{PDFutils} (see \ref{pdfutils}).


\subsubsection{\textit{getAdditionalDiscrimiatorPlots}}
\texttt{configData.getAdditionalDiscrimiatorPlots(alwaysExecute = False)}\\
\begin{tabular}{r|l}
\hline
\texttt{alwaysExecute}	&	forces the script to continue, even if no \\														&	\texttt{additionalPlotVariablesMap} was found.\\
\hline
\end{tabular}
\\
Is only executed, if \texttt{analysis.additionalPlotVariables} is set to True.\\
Creates list of plot for additional input variables. The function checks, if the file \texttt{additionalPlotVariablesMap.py} already exists.\\
If yes, it tries to determine the number of bins and plot range from that file. \\
If no, it will construct a dictionary and save it to the file and exit.\\
When additional plots are found, it extends the previously created \texttt{discriminatorPlots} list.


\subsubsection{\textit{initSamples}}
\texttt{configData.initSamples()}\\
Imports \texttt{<pyrootdir>/configs/<basename>\_samples} as config file for generating the needed samples and calls the function in table \ref{sample lists}.

\begin{table}
\caption{\textbf{sample lists.}} \label{sample lists}
\begin{tabular}{r|l}
\texttt{<basename>\_samples} function & content  \\
\hline
\texttt{getSamples()} 			& 	samples from \texttt{pltcfg}	\\
\texttt{getControlSamples()} 	& 	control samples from \texttt{pltcfg}\\
\texttt{getSystSamples()} 		&	systematic samples from \texttt{pltcfg}\\
\texttt{getAllSamples()} 		&	list of samples written to cpp program\\
\hline
\texttt{getAllSystNames()} 		& 	names of samples, e.g. for \texttt{renameHistos} step\\
\texttt{getWeightSystNames()}	&	names of systematic weights, e.g. for \texttt{plotParallel}\\
\texttt{getOtherSystNames()} 	&	other systematic names, e.g. for \texttt{plotParallel}\\
\texttt{getSystWeights()} 		&	list of systematic weights, e.g. for \texttt{plotParallel}\\
\end{tabular}
\end{table}

~\\
For the documentation of \texttt{<pyrootdir>/configs/<basename>\_samples} files see \ref{configs_samples}


\subsubsection{\textit{getDiscriminatorPlotByNumber}}
\texttt{configData.getDiscriminatorPlotByNumber()}\\
Sets the variable \texttt{plotParallel.discriminatorPlotByNumber} depending on \texttt{analysisClass.plotNumber}.\\
This should be used before calling the \texttt{drawParallel} class init, such that the plots for the appropriate discriminator plots can be created.


\subsubsection{\textit{getDiscriminatorPlots}}
\texttt{configData.getDiscriminatorPlots()}\\
Returns a single discriminator plot as list when \texttt{analysisClass.plotNumber} is specified.\\
Returns all discriminator plots from \texttt{configData.discriminatorPlots} when no plot number is specified.


\subsection{class \texttt{plotParallel.plotParallel}}


\subsubsection{\textit{init}}
\texttt{plotParallel.plotParallel(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{analysis}		& \texttt{analysisClass} class instance. \\
\texttt{configData}		& \texttt{configData} class instance.\\
\hline
\end{tabular}\\
Saves \texttt{analysisClass} and \texttt{configData} as members, sets \texttt{analysisClass.ppRootPath} default to \texttt{output.root}. Also initializes default values for the options, which can be adjusted via setter functions.


\subsubsection{\textit{setJson}}
\texttt{plotParallel.setJson(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{jsonFile}	& path to json file which stores tree information.\\
\hline
\end{tabular}
\\
The json file can be used to speed up the counting of events in root trees, as it contains the tree names and contents.\\
Default value is \texttt{''}.


\subsubsection{\textit{setDataBases}}
\texttt{plotParallel.setDataBases(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{dataBases}	& list of lists of information about MEM databases.\\
\hline
\end{tabular}
\\
Each list of information about a database contains its name, path and flag to skip non existing events.\\
The databases are included when writing the cpp program.\\
Default value is \texttt{[]}.


\subsubsection{\textit{setAddInterfaces}}
\texttt{plotParallel.setAddInterfaces(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{interfaces}	& list of paths to DNNInterfaces \\
\hline
\end{tabular}
\\
Loads the interfaces as module and stores them in \texttt{addInterfaces}.\\
Default value is \texttt{[]}.


\subsubsection{\textit{setCatNames}}
\texttt{plotParallel.setCatNames(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{categoryNames}	&	list of category names.\\
\hline
\end{tabular}
\\
Default value is \texttt{['']}.


\subsubsection{\textit{setCatSelections}}
\texttt{plotParallel.setCatSelections(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{categorySelections}	&	list of category selections.\\
\hline
\end{tabular}
\\
Default value is \texttt{[1.]}.

\subsubsection{\textit{setMaxEvts}}
\texttt{plotParallel.setMaxEvts(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{maxevts}	&	number of maximum events per file.\\
\hline
\end{tabular}
\\
Default value is \texttt{50000000.}


\subsubsection{\textit{run}}
\texttt{plotParallel.run()}\\
Environment for setting up the cpp programm with \texttt{scriptWriter} (see \ref{scriptWriter}), creating the rename script (see \ref{renamescript}), writing run scripts (see \ref{runscripts}) and executing the run scripts (i.e. the cpp programm) on the NAF batch system (see \ref{plotSubmit}).\\
Upon termination of the run script this function also calls the \texttt{haddParallelInterface} function to start adding the created histograms together.\\
With the previously defined flag \texttt{analysisClass.skipPlotParallel} the submission of the run scripts to the NAF batch system can be skipped if the output files of \texttt{plotParallel} are already present in the working directory.\\
With the previously defined flag \texttt{analysisClass.useOldRoot} the execution of \texttt{plotParallel} can be skipped, if an output root file already exists in the working directory. This options does not check the content of the root file, it only checks its existence.\\
With the previously defined flag \texttt{analysisClass.stopAfterCompile} the script can be stopped after successfully compiling the cpp program.\\
~\\
The following functions are called during the execution of \texttt{plotParallel.run()}:
\begin{itemize}
\itemsep-0.5em
\item \texttt{scriptWriter.scriptWriter}
\item \texttt{scriptWriter.writeCC}
\item \texttt{scriptWriter.writeRenameScript}
\item \texttt{scriptWriter.writeRunScripts}
\item \texttt{nafInterface.plotInterface}
\item \texttt{plotParallel.haddParallelInterface}
\end{itemize}


\subsubsection{\textit{haddParallelInterface}}
\texttt{plotParallel.haddParallelInterface(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{writer}	& instance if \texttt{scriptWriter} class.\\
\hline
\end{tabular}
\\
Initializes a \texttt{haddParallel.haddParallel} class and runs it (see \ref{haddParallel}). 


\subsubsection{\textit{checkTermination}}
\texttt{plotParallel.checkTermination()}\\
Checks if the \texttt{plotParallel.finished} flag is set to True, which should happen, if the process has finished successfully.

\subsubsection{\textit{checkHaddFiles}}
\texttt{plotParallel.checkHaddFiles()}\\
Checks, if the list \texttt{plotParallel.haddFiles} exists and returns True/False.

\subsubsection{\textit{setRenameInput}}
\texttt{plotParallel.setRenameInput()}\\
Attempts to set an input for the \texttt{renameHistos} step. If \texttt{plotParallel.run} was executed the usual way, it produces a number of hadd files (\texttt{plotParallel.haddFiles}) which should be used as the input for \texttt{renameHistos}.\\
If \texttt{plotParallel.run} was not executed as usual the \texttt{analysisClass.limitPath} ROOT file should be used as an input.


\subsubsection{\textit{addData}}
\texttt{plotParallel.addData(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{samples}	&	list of samples for which to add data.\\
\hline
\end{tabular}
\\
Takes the nick names of all input samples and loops over the binlabels specified with \texttt{configData.binlabels} to add data to the histograms. (?)


\subsection{class \texttt{haddParallel.haddParallel}}
\label{haddParallel}
The \texttt{haddParallel} class handles the output ROOT files from \texttt{plotParallel} and adds the histograms of the ROOT files together. As this has to be done many times it is usually performed via the NAF HTC batch system as parallelly executed jobs.

\subsubsection{\textit{init}}
\texttt{haddParallel.haddParallel(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{plotParallelClass}	& instance of a \texttt{plotParallel} class to inherit member functions.\\
\hline
\end{tabular}
\\
Initializes the \texttt{haddParallel} class and inherits the needed functions from \texttt{plotParallel}.

\subsubsection{\textit{run}}
\texttt{haddParallel.run(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{writer}	& instance of a \texttt{scriptWriter} class to write needed scripts.\\
\hline
\end{tabular}
\\
Writes the python script \texttt{<workdir>/haddScript.py} with \texttt{scriptWriter.writeHaddScript}, which is later executed by jobs on the NAF batch system.\\
Loops over the \texttt{plotParallel.samplewiseMaps} created during \texttt{scriptWriter.writeRunScripts} and writes shell scripts with the \texttt{scriptWriter.writeHaddShell} function for all the samples.\\
Afterwards, it executes the scripts on the NAF batch system with \texttt{nafInterface.haddInterface} (see \label{nafInterface}).\\
If the \texttt{analysisClass.skipHaddParallel} option was activated, the writing of scripts is skipped and only \texttt{nafInterface.haddTerminationCheck} is run, to check for the termination of all jobs. If not all jobs have terminated, the \texttt{run} function is called iteratively.\\
If the \texttt{analysisClass.haddParallel} option is deactivated, non parallel hadding is performed, but this option is not used anymore. 


\subsubsection{\textit{haddSplitter}}
\texttt{haddParallel.haddSplitter(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{input}				&	input ROOT files, either as string with wildcards or as list.\\
\texttt{outName}			&	path to desired output ROOT file.\\
\texttt{subName}			&	string of naming scheme for bookkeeping when splitting the files.\\
\texttt{nHistosRemainSame}	&	flag to decide whether number of histograms need to stay the same.\\
\texttt{skipHadd}			&	flag to enable skippint the hadding process.\\
\texttt{forceHadd}			&	flag to enable force hadding, thereby overwriting existing histograms.\\
\hline
\end{tabular}
\\
This script is actually not a part of the \texttt{haddParallel} class but rather a standalone hadding function.\\
It takes a list of ROOT files (either as a list or with a wildcard path) and tries to add all histograms in those files together to form one single output ROOT file.\\
As the amount of files can be very large sometimes, it adds the input ROOT files in bulks, instead of adding all together at once. For every bulk of ROOT files it calls the function \texttt{callHadd}, which executes the \texttt{hadd} command for the given files. At the end, all the part-files are combined to the desired output.\\
If the flag \texttt{skipHadd} is activated it does not perform the hadding and only compares the amount of histograms before and after the hadding process and decides whether to redo the hadding process or proceed with the next step depending on \texttt{nHistosRemainSame}.


\subsection{\texttt{renameHistos.py}}

\subsubsection{\textit{renameHistosParallel}}
\texttt{renameHistos.renameHistosParallel(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{inFile}			& ROOT file created by cpp program execution.\\	
\texttt{outFile}		& ROOT file that is supposed to be created.\\
\texttt{systNames}		& list of names of systematics considered.\\
\texttt{checkBins}		& option to activate checking bins (default \texttt{False}).\\
\texttt{prune}			& ? (default \texttt{False}).\\
\texttt{plotParaCall}	& indicator, whether the call of the function happened during \texttt{plotParallel}\\
						& (default \texttt{False}).\\
\texttt{Epsilon}		& ? (default \texttt{0.0}).\\
\hline
\end{tabular}
\\
Copies the \texttt{inFile} to \texttt{outFile} and loops over every key in the ROOT file.\\
Counts number of systematics per key, renaming the key, as well as removing histograms which have more than two systematics.\\
If the script is not called via \texttt{plotParallel} the bins are checked and adjusted.


\subsubsection{\textit{renameHistos}}
\texttt{renameHistos.renameHistos(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{inFiles}		&	list of input ROOT files, usually determined by \texttt{plotParallel.setRenameInput()}.\\
\texttt{outFile}		&	output ROOT file, usually determined by \texttt{analysisClass.limitPath}.\\
\texttt{systNames}		&	names of systematic uncertainties, usually determined by \texttt{configData.allSystNames}.\\
\texttt{checkBins}		&	option to activate checking bins (default \texttt{False}).\\
\texttt{prune}			&	? (default \texttt{False}).\\
\texttt{Epsilon}		&	? (default \texttt{0.0})\\
\texttt{skipRenaming}	&	option to skip the renaming process (default \texttt{False}).\\
\hline
\end{tabular}
\\
If the function is called with a single file as input it directly calls \texttt{renameHistosParallel}.\\
Otherwise, it writes a rename script with the \texttt{writeRenameScript} function and creates shell scripts for each input file which calls the created rename script. 
Then all the scripts are submitted to the NAF batch system via \texttt{nafInterface.renameInterface}.
Upon successfull termination of these jobs, the function \texttt{haddSplitter} is called to hadd all the renamed ROOT files together to create the output ROOT file.\\
If the \texttt{analysisClass.skipRenaming} option is activated, it does not write the shell scripts and directly checks the already existing output via \texttt{nafInterface.renameTerminationCheck}. If all the scripts have terminated successfully, the submission is skipped, otherwise, the function is called iteratively (WIP).


\subsubsection{\textit{writeRenameScript}}
\texttt{renameHistos.writeRenameScript(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{outFile}	 &	name of output ROOT file of \texttt{renameHistos} function.\\
\texttt{skipRenaming}&  option to activate skipping the writing of the script.\\
\hline
\end{tabular}
\\
Writes a python script that calls the \texttt{renameHistos.renameHistosParallel} function for the jobs created in the \texttt{renameHistos} function.\\


\subsection{class \texttt{optBinning.optimizeBinning}}

\subsubsection{\textit{init}}
\texttt{optBinning.optimizeBinning(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{infname}			&	input ROOT file.\\
\texttt{signalsamples}		&	list of samples declared as signal samples, this depends on the signal process\\
							&	(\texttt{analysisConfig.signalProcess}).\\
\texttt{backgroundsamples}	&	list of samples declared as background samples, this depends on the signal process\\
							&	(\texttt{analysisConfig.signalProcess}).\\
\texttt{additionalSamples}	&	list of additional samples also considered during the rebinning.\\
\texttt{plots}				&	list of discriminator plots.\\
\texttt{systnames}			&	list of considered systematics (e.g. \texttt{configData.allSystNames}).\\
\texttt{minBkgPerBin}		&	minimal number of events per bin.\\
\texttt{optMode}			&	type of optimization, determined by \texttt{analysisClass.optimizedRebinning}.\\
\texttt{considerStatUnc}	&	flag to decide whether to also consider statistical uncertainties.\\
\texttt{maxBins}			&	maximum number of bins per histogram.\\
\texttt{minBins}			&	minimal number of bins per histogram.\\
\hline
\end{tabular}
\\
Loops over all discriminator plots and all samples and adds them to lists for signal and background clones. Then calls the function \texttt{getOptimizedBinEdges} per discriminator plot to get the optimized bin edges. \\
Next it loops over all samples and systematics and rebins all the histograms.


\subsubsection{\textit{getOptimizedBinEdges}}
\texttt{optimizeBinning.getOptimizedBinEdges(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{signalHisto}	&	clone of the signal histogram.\\
\texttt{bkgHisto}		&	clone of the background histogram.\\
\texttt{optMode}		&	type of optimization for the rebinning process.\\
\texttt{minBkgPerBin}	&	minimal number of events per bin.\\
\texttt{maxBins}		&	maximum number of bins per histogram.\\
\texttt{minBins}		&	minimal number of bins per histogram.\\
\texttt{considerStatUnc}&	flag to decide whether to also consider statistical uncertainties.\\
\hline
\end{tabular}
Depending on the optimization mode this function searches for the optimal bin edges by analyzing the input histograms and returns the bin edges as a list.


\subsection{\texttt{makeDatacards.py}}

\subsubsection{\textit{makeDatacardsParallel}}
\texttt{makeDatacards.makeDatacardsParallel(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{filePath}		&	path to input ROOT file to make the datacards, usually \texttt{analysisClass.limitPath}.\\
\texttt{outPath}		&	path to output folder for datacards, usually \texttt{<workdir>/datacards/}.\\
\texttt{categories}		&	list of categories to consider, usually \texttt{configData.binlabels}.\\
\texttt{doHdecay}		&	option to consider Higgs decays (default \texttt{True}).\\
\texttt{discrname}		&	name of discriminator (default \texttt{finaldiscr}).\\
\texttt{datacardmaker}	&	name of datacard maker (default \texttt{mk\_datacard\_hdecay13TeVPara}).\\
\texttt{skipDatacards}	&	option to skip making the datacards.\\
\hline
\end{tabular}
\\
This function writes a shell script for each category which calls the actual datacard maker and submits them to the NAF batch system via \texttt{nafInterface.datacardInterface}.\\
After termination it adds the bin-by-bin ROOT files, created during datacard making to the input file with \texttt{haddBinByBinFiles}.\\
If the option \texttt{analysis.skipDatacards} was activated, it skips the creation of the shell scripts and NAF submission and directly checks if the datacards already exist with \texttt{nafInterface.datacardTerminationCheck}. If all datacards exist, the submission is skipped, otherwise, the function is called iteratively.\\


\subsubsection{\textit{haddBinByBinFiles}}
\texttt{makeDatacards.haddBinByBinFiles(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{bbbFiles}		&	list of bin-by-bin ROOT files created during \texttt{makeDatacardsParallel}.\\
\texttt{filePath}		&	path to ROOT file which was used to create the datacards.\\
\hline
\end{tabular}
\\
Moves the bin-by-bin files to a separate folder \texttt{<workdir>/binbybinfiles/} and adds the histograms in these files to the main ROOT file via hadd.


\subsection{\texttt{drawParallel.py}}
This function is used to manage drawing the actual histograms to pdf/etc files. This is done parallely via submission of multiple jobs to the batch system. The scripts created re-call the top-level script but with a specific plot number, thereby skipping to the \texttt{genPlots} dependent functions.

\subsubsection{\textit{drawParallel}}
\texttt{drawParallel.drawParallel(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{ListOfPlots}	&	list of discriminator plots, usually \texttt{configData.discriminatorPlots}.\\
\texttt{workdir}		&	path to working directory, usually \texttt{analysisClass.workdir}.\\
\texttt{PathToSelf}		&	path to top-level script.\\
\texttt{opts}			&	analysis options, usually \texttt{analysisClass.opts}\\
\hline
\end{tabular}
\\
Creates shell scripts with \texttt{createDrawScripts} for each discriminator plot, which are submitted to the batch system via \texttt{nafInterface.drawInterface}.


\subsubsection{\textit{createDrawScripts}}
\texttt{drawParallel.createDrawScripts(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{iPlot}		&	number of discriminator plot to determine the \texttt{analysisClass.plotNumber}.\\
\texttt{Plot}		&	discriminator plot.\\
\texttt{PathToSelf}	&	path to top-level script.\\
\texttt{scriptPath}	&	path to shell script to be written.\\
\texttt{opts}		&	analysis options, usually \texttt{analysisClass.opts}\\
\hline
\end{tabular}
\\
Writes the shell script to re-execute the top-level script for the specific discriminator plot.

\subsection{class \texttt{genPlots.genPlots}}

\subsubsection{\textit{init}}
\texttt{genPlots.genPlots(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{outPath}	&	path to input ROOT file, usually \texttt{analysisConfig.limitPath}.\\
\texttt{plots}		&	list of discriminator plots, determined usually by \texttt{configData.getDiscriminatorPlots}.\\
\texttt{plotdir}	&	output path of created plots, usually determined by \texttt{analysisClass.getPlotPath}.\\
\texttt{rebin}		&	option to perform rebinning (default \texttt{-1}).\\
\hline
\end{tabular}
\\
Inherits the given arguments and creates empty dictionaries \texttt{lists}, \texttt{samples} and \texttt{nestedhistLists}, which are needed during the later steps.

\subsubsection{\textit{genList}}
\texttt{genPlots.genList(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{samples}	&	list of samples for which the list class should be created.\\
\texttt{listName}	&	name for the class.\\
\texttt{catNames}	&	list of category names (default \texttt{['']}).\\
\texttt{doTwoDim}	&	option for two dimensional histograms \texttt{ROOT.TH2}.\\
\hline
\end{tabular}
\\
Initiates a \texttt{genPlots.List} class instance which contains lists of histograms in different configurations, depending on the included samples and keys of the ROOT file.

\subsubsection{\textit{makeSimpleControlPlots}}
\texttt{genPlots.makeSimpleControlPlots(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{dataConfig}	&	\texttt{genPlots.Config} class instance for configuration of used data.\\
\texttt{options}	&	dictionary of plotting options (see \ref{plotoptionstable}).\\
\hline
\end{tabular}
\\
Loops over the histograms specified in \texttt{dataConfig} and proceeds to draw simple control histograms.

\subsubsection{\textit{makeSimpleShapePlots}}
\texttt{genPlots.makeSimpleShapePlots(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{dataConfig}	&	\texttt{genPlots.Config} class instance for configuration of used data.\\
\texttt{options}	&	dictionary of plotting options (see \ref{plotoptionstable}).\\
\hline
\end{tabular}
\\
Loops over the histograms specified in \texttt{dataConfig} and proceeds to draw simple shape comparison histograms.


\begin{table}
\caption{\textbf{options for simple plots.}}\label{plotoptionstable}
\begin{tabular}{r|ll}
option					& default			& 	\\
\hline
\texttt{factor}			& \texttt{-1}		&  	\\
\texttt{logscale}		& \texttt{False}	&	plot with logarithmic y-scale.\\
\texttt{canvasOptions}	& \texttt{'histo'}	&	???\\
\texttt{normalize}		& \texttt{False}	&	normalize y-scale.\\
\texttt{stack}			& \texttt{False}	&	stack histograms.\\
\texttt{ratio}			& \texttt{False}	&	add ratio plot.\\
\texttt{doProfile}		& \texttt{False}	&	???\\
\texttt{statTest}		& \texttt{False}	&	perform Kolmogorov and $\chi^2$ tests.\\
\texttt{sepaTest}		& \texttt{False}	&	calculate ROC-AUC value.\\
\texttt{blinded}		& \texttt{True}		&	blind signal region.\\

\end{tabular}
\end{table}


\subsubsection{\textit{genNestedHistList}}
\texttt{genPlots.genNestedHistList(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{dataConfig}		& \texttt{genPlots.Config} class instance for configuration of used data.\\
\texttt{systNames}		& list of names of systematic uncertainties to be considered when making control plots.\\
\texttt{outName}		& name of created object.\\
\hline
\end{tabular}
\\
Prepares a nested list of histograms for use in \texttt{makeControlPlots}.\\ 
For every list created an entry should be added to a \texttt{nestedHistsConfig} containing the desired configurations of the plots.


\subsubsection{\textit{makeControlPlots}}
\texttt{genPlots.makeControlPlots(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{dataConfig}			&	\texttt{genPlots.Config} class instance for configuration of data samples.\\
\texttt{controlConfig}		&	\texttt{genPlots.Config} class instance for configuration of control plots.\\
\texttt{sampleConfig}		&	\texttt{genPlots.Config} class instance for configuration of samples.\\
\texttt{headHist}			&   list which contains the first histogram.\\
\texttt{headSample}			&	list which contains the first sample.\\
\texttt{nestedHistsConfig}	&	config for nested histogram list.\\
\texttt{options}			&	dictionary of plotting options (see \ref{plotoptionstable}).\\
\hline
\end{tabular}
\\
Creates super duper plots. TODO.


\subsubsection{\textit{makeEventYields}}
\texttt{genPlots.makeEventYields(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{categories}			&	list of categories for which event yields should be calculated.\\
\texttt{listName}			&	list which contains the data of histograms.\\
\texttt{dataName}			&	list which contains some other data?.\\
\texttt{nameRequirement}	&	name requirement for histograms.\\
\hline
\end{tabular}
\\
TODO.


\newpage

\section{Tools}

\subsection{class \texttt{plotClasses.Plot}}

\subsubsection{\textit{init}}
\texttt{plotClasses.Plot(\args, variable = "", selection = "", label = "")}\\
\begin{tabular}{r|l}
\hline
\texttt{histo}		&	\texttt{ROOT.TH1}-type instance which contains the plot.\\
\texttt{variable}	&	name of \texttt{discr}.\\
					& 	if no argument is given, \texttt{histo.GetName()} is chosen as \texttt{variable}.\\
\texttt{selection}	&	name of \texttt{plotPreselection}.\\
\texttt{label}		& 	name of \texttt{binlabel}.\\
\hline
\end{tabular}
\\
The variables \texttt{discr}, \texttt{plotPreselection} and \texttt{binlabel} are usually defined in the \texttt{<pyrootdir>/configs/<basename>\_plots} files (see \ref{configs_plots}).


\subsection{class \texttt{plotClasses.Sample}}

\subsubsection{\textit{init}}
\texttt{plotClasses.Sample(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{name}			&	name of sample. \\
\texttt{color}			&	plotting color (default \texttt{ROOT.kBlack}).\\
\texttt{path}			&	path to samples, supports wildcards (default \texttt{''}).\\
\texttt{selection}		&	selection weight (default \texttt{''}).\\
\texttt{nick}			&	nick of sample (default \texttt{''}). \\
\texttt{listOfShapes}	&	list of shape samples (default \texttt{[]}).\\
\texttt{up}				&	? (default \texttt{0}).\\
\texttt{down}			&	? (default \texttt{None}).\\
\texttt{samDict}		&	\texttt{plotClasses.SampleDictionary} instance (default \texttt{''}).\\
\texttt{readTrees}		&	allows globbing samples from different paths (default \texttt{False}).\\
\texttt{filterFile}		&	file with filter information (default \texttt{'NONE'}).\\
\texttt{checknevents}	&	? (default \texttt{-1}).\\
\texttt{treename}		&	name of tree for ROOT file (default \texttt{'MVATree'}).\\
\hline
\end{tabular}
\\
Saves the given informations as member variables. If \texttt{readTrees} option is activated also searches for samples in different paths.


\subsection{\texttt{PDFutils.py}}
\label{pdfutils}
\subsubsection{\textit{ GetMEPDFadditionalVariablesList }}
\texttt{GetMEPDFadditionalVariablesList(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{csvfile}	& path to csv-type file \\
					& containing names, weights and factors for matrix element pdf variables.\\
\hline
\end{tabular}
\\
Reads csv file with \texttt{ReadMEandPDFNormalizations} function, scans dictionary for double entries and returns list of extracted weight variables.


\subsection{class \texttt{scriptWriter.scriptWriter}}

\subsubsection{\textit{init}}
\texttt{scriptWriter.scriptWriter(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{plotParaClass} 	& instance of \texttt{plotParallel} class, which is inherited.\\
\hline
\end{tabular}
\\
Inherits \texttt{plotParallel} class.


\subsubsection{\textit{writeCC}}
\label{scriptWriter}
\texttt{scriptWriter.writeCC()}\\
Main function for writing and compiling the cpp program. Calls the functions \texttt{createProgram} and \texttt{compileProgram}.
When an old version of the cpp program was already present in the working directory, this function also checks, whether the newly created program differs from the old one.


\subsubsection{\textit{createProgram}}
\texttt{scriptWriter.createProgram()}\\
Main function for writing the cpp programm.\\
First, generates a veto list from \texttt{<pyrootdir>/data/vetolist.csv} file of variables, that should not be written to the programm automatically. This also consideres a veto list for LHEWeights which is created from the \texttt{plotParallel.MEPDFCSVFile}, a veto list for DNNInterface variables defined in \texttt{plotParallel.addInterfaces} and a veto list for data bases which is created from the \texttt{plotParallel.dataBases}.\\
For the cpp file, the variables in \texttt{configData.allSamples} need to be initialized, which is done with the \texttt{variablebox.Variables} class (see \ref{varbox-variables}). For this purpose a root tree is chosen to perform variable checks.\\
~\\
With the variable information multiple functions of \texttt{scriptfunctions.py} (see \ref{scriptfunctions}), etc are called:
\begin{itemize}
\itemsep-0.5em
\item \texttt{scriptfunctions.getHead}
\item \texttt{scriptfunctions.DeclareMEPFNormFactors}
\item \texttt{scriptfunctions.ADDMEandPDFNormalizationsMap}
\item \texttt{scriptfunctions.InitDataBase}
\item \texttt{DNNInterface.getBeforeLoopLines}
\item \texttt{variablebox.Variables.initVarsProgram}
\item \texttt{variablebox.Variables.initBranchAdressesProgram}
\item \texttt{variablebox.Variables.setupTMVAReadersProgram}
\item \texttt{scriptfunctions.initHistos}
\item \texttt{scriptfunctions.startLoop}
\item \texttt{scriptfunctions.initMEPDF.writeCode}
\item \texttt{DNNInterface.getVariableInitInsideEventLoopLines}
\item \texttt{scriptfunctions.encodeSampleSelection}
\item \texttt{scriptfunctions.readOutDataBase}
\item \texttt{DNNInterface.getEventLoopCodeLines}
\item \texttt{variablebox.Variables.calculateVarsProgram}
\item \texttt{scriptfunctions.initPlots.startCat}
\item \texttt{scriptfunctions.initPlots.initPlot}
\item \texttt{scriptfunctions.initPlots.endCat}
\item \texttt{scriptfunctions.endLoop}
\item \texttt{DNNInterface.getTestCallLines}
\item \texttt{scriptfunctions.getFoot}
\end{itemize}
After successfully writing the cpp program it is saved to \texttt{<workdir>/<workdirname>.cc} and compiled with \texttt{compileProgram}.


\subsubsection{\textit{compileProgram}}
\texttt{scriptWriter.compileProgram()}\\
This function generates a compile command, depending on \texttt{plotParallel.addInterfaces} and \texttt{plotParallel.dataBases} and tries to execute it on the program created in \texttt{createProgram}. The script aborts, if the compilation was not successfull.


\subsubsection{\textit{writeRenameScript}}
\label{renamescript}
\texttt{scriptWriter.writeRenameScript()}\\
This function writes a short python script at \texttt{<workdir>/<workdirname>\_rename.py} which calls the function \texttt{renameHistos.renameHistosParallel} upon execution.


\subsubsection{\textit{writeRunScripts}}
\label{runscripts}
\texttt{scriptWriter.writeRunScripts()}\\
This function handles the writing of scripts to execute the cpp program. It creates lists for \texttt{scripts}, \texttt{output}, \texttt{nentries} and a dictionary \texttt{samplewiseMaps} for bookkeeping purposes.\\
It loops over all samples in \texttt{configData.allSamples} and over all files in those samples, counting the events in each file (writing it to \texttt{nentries}) and calling the function \texttt{writeSingleScript} to write the shell script.\\
If the number of events in the file exceed the limit determined by \texttt{plotParallel.maxevents}, the file is split into parts and separate shell scripts are written for each part.\\
If the \texttt{analysisClass.cirun} option is activated, only the first file per sample is considered, thereby reducing the number of jobs and events drastically. This is only for testing purposes of the framework, the results will not be very meaningful.\\
This function returns the bookkeeping lists as a dictionary, to be used by the NAF submit interfaces.

\subsubsection{\textit{writeSingleScript}}
\texttt{scriptWriter.writeSingleScript(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{sample}			& current sample for which a script is written.\\
\texttt{filenames}		& current file in sample for which a script is written.\\
\texttt{nJob}			& counter of jobs to determine name of written shell file.\\
\texttt{filterFile}		& filterFile of sample.\\
\texttt{writeOptions}	& dictionary of options for writing the shell script\\
						& includes for example \texttt{'skipEvents'} for large samples.\\
\hline
\end{tabular}
\\
This script writes the a shell script to execute the cpp programm, previously created. In the shell script various environment variables are exported for use in the cpp program.\\
After calling the cpp program the shell script also calls the rename script created in \texttt{writeRenameScript}.\\
This script also adds the name of the shell script and the name of the output root file to the bookkeeping lists \texttt{scripts} and \texttt{outputs}, which were created in \texttt{writeRunScripts}.


\subsubsection{\textit{writeHaddScript}}
\texttt{scriptWriter.writeHaddScript()}\\
Writes a python script to \texttt{<workdir>/haddScript.py} which takes multiple arguments:
\begin{itemize}
\itemsep-0.5em
\item desired output ROOT file name
\item desired location of log file
\item ROOT files to be added together
\end{itemize}
All arguments except the first two specify the ROOT files whose histograms are added together via the bash command \texttt{hadd} designed for ROOT.\\
The script writes either \texttt{OK} or \texttt{ERROR} into the specified log file, such that the success of the hadd process can be judged by the \texttt{nafInterface.haddTerminationCheck} function.


\subsubsection{\textit{writeHaddShell}}
\texttt{scriptWriter.writeHaddShell(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{scriptname}		& name of shell script to be written.\\
\texttt{haddedRootName}	& desired output ROOT file name.\\
\texttt{haddedLogName}	& desired log file name.\\
\texttt{sampleData}		& list of ROOT files to be added together.\\
\hline
\end{tabular}
\\
Writes a shell script which is supposed to be submitted to the NAF batch system.\\
The script calls the \texttt{<workdir>/haddScript.py} script which was created with \texttt{scriptWriter.writeHaddScript}.

\subsection{class \texttt{variablebox.Variables}}
\label{varbox-variables}

\subsubsection{\textit{init}}
\texttt{variablebox.Variables(veto = [])}\\
\begin{tabular}{r|l}
\hline
\texttt{veto}	& previouly generated vetolist of plots that should not be considered by default.\\
\hline
\end{tabular}
\\
Initializes a dictionary for variables and inherits the vetolist.

\subsubsection{\textit{initVars}}
\texttt{variablebox.Variables.initVars(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{expr}	& list or string of expressions for creating variables.\\
\texttt{tree}	& ROOT tree to extract variables\\
\hline
\end{tabular}
\\
If \texttt{expr} is a list, the function is called iteratively per list element.\\
Depending on the structure of the expression, the single variables are initialized with the \texttt{initSingleVar} function, where a variable is added to the \texttt{variables} dictionary created during \textit{init} as a \texttt{variablebox.Variable} instance (see \ref{varbox-var}).\\
Only variables that are not in the vetolist or already in the variable dictionary are added to the dictionary.

\subsubsection{\textit{initVarsProgram}}
\texttt{variablebox.Variables.initVarsProgram()}\\
Loops over all variables in \texttt{variables} dictionary and writes output of \texttt{variablebox.Variable.initVarProgram} to cpp file.

\subsubsection{\textit{initBranchAddressesProgram}}
\texttt{variablebox.Variables.initBranchAdressesProgram()}\\
Loops over all variables in \texttt{variables} dictionary and writes output of \texttt{variablebox.Variable.initBranchAdresssProgram} to cpp file.

\subsubsection{\textit{setupTMVAReadersProgram}}
\texttt{variablebox.Variables.setupTMVAReadersProgram()}\\
Loops over all variables in \texttt{variables} dictionary and writes output of \texttt{variablebox.Variable.setupTMVAReaderProgram} to cpp file.

\subsubsection{\textit{calculateVarsProgram}}
\texttt{variablebox.Variables.caluclateVarsProgram()}\\
Sorts variable dictionary such that dependencies can be caluclated in the proper order, creates list of conditions for the variables and writes code for each variable considering the conditions and also writes output of \texttt{variablebox.Variable.calculateVarProgram} to cpp file.


\subsection{class \texttt{variablebox.Variable}}
\label{varbox-var}
\subsubsection{\textit{init}}
\texttt{variablebox.Variable(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{name}			& name of variable\\
\texttt{expression}		& extracted expression for variable \\
						& if none is given it also takes \texttt{name} as input\\
\texttt{vartype}		& type of variable as string, e.g. \texttt{'F'} for float\\
\texttt{arraylength}	& flag for array variables (default \texttt{None}\\
\hline
\end{tabular}
\\
Saves input information as member variables.

\subsubsection{\textit{initVarProgram}}
\texttt{variablebox.Variable.initVarProgram()}\\
Writes code for variable depending on \texttt{vartype} and \texttt{arraylength} to cpp file.

\subsubsection{\textit{initBranchAdressProgram}}
\texttt{variablebox.Variable.initBranchAdressProgram()}\\
Writes code to set \texttt{ROOT.TChain} branch adresses to cpp file.

\subsubsection{\textit{setupTMVAReaderProgram}}
\texttt{variablebox.Variable.setupTMVAReaderProgram(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{variables}	& list of variables.\\
\hline
\end{tabular}
\\
writes code from \texttt{variablebox.Variable.initReaderProgram}, \texttt{variablebox.Variable.addVariablesToReaderProgram} and \texttt{variablebox.Variable.bookMVAProgram} to cpp file.\\
Disclaimer: this is untested and might not work.


\subsection{\texttt{scriptfunctions.py}}
\label{scriptfunctions}
TODO


\subsection{\texttt{nafInterface.py}}
This file contains one function \texttt{<usecase>Interface} for each use case, where the NAF batch system is called, which handles the submission to the batch system and performs checks on the output with the \texttt{<usecase>TerminationCheck} functions.\\
The use cases implemented are:
\begin{itemize}
\itemsep-0.5em
\item \texttt{plotParallel}
\item \texttt{haddParallel}
\item \texttt{renameHistos}
\item \texttt{makeDatacards}
\item \texttt{drawParallel}
\end{itemize}


\subsubsection{\textit{plotInterface}}
\label{plotSubmit}
\texttt{nafInterface.plotInterface(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{jobData}			&	dictionary containing \texttt{scripts}, \texttt{outputs}, \texttt{entries} and \texttt{maps},\\
							&	created in \texttt{scriptWriter.writeRunScripts}.\\
\texttt{skipPlotParallel}	&	option to skip the submission of scripts to the batch system.\\
							&	(default \texttt{False}).\\
\texttt{maxTries}			&	maximum number of resubmits after failing (default \texttt{10}).\\
\texttt{nTries}				&	counter for current try (default \texttt{0}).\\
\hline
\end{tabular}
\\
The functions \texttt{nafSubmit.submitArrayToNAF} or \texttt{nafSubmit.submitToNAF} are called, depending on \texttt{nTries}.\\
After submission of the scripts with the \texttt{nafSubmit} functions, the job status is monitored with \texttt{nafSubmit.monitorJobStatus}. \\
Upon termination of all jobs, the output is checked with \texttt{plotTerminationCheck}. Scripts that did not pass the check are resubmitted to the batch system, by iteratively calling the \texttt{plotInterface} function.\\
If the \texttt{nTries} counter exceeds the \texttt{maxTries} threshold, the program is terminated.\\
If the \texttt{skipPlotParallel} option was activated, it skips directly to \texttt{plotTerminationCheck}, to check whether the \texttt{plotParallel} output is complete.\\


\subsubsection{\textit{plotTerminationCheck}}
\texttt{nafInterface.plotTerminationCheck(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{jobData}		&	dictionary containing \texttt{scripts}, \texttt{outputs}, \texttt{entries} and \texttt{maps},\\
						&	created in \texttt{scriptWriter.writeRunScripts}.\\
\hline
\end{tabular}
\\
Loops over the jobs in \texttt{jobData} and crosschecks the number of entries in \texttt{<output>.cutflow.txt} (which is created by the cpp program) with the number of entries in \texttt{<entries>}.\\
If the number does not match or the cutflow file does not exist, the job is added to a resubmission list, which is returned at the end of the function. 


\subsubsection{\textit{haddInterface}}
\texttt{nafInterface.haddInterface(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{jobsToSubmit}		&	list of shell scripts to be submitted to the batch system.\\
\texttt{outfilesFromSubmit}	&	list of output ROOT files.\\
\texttt{maxTries}			&	maximum number of resubmits after failing (default \texttt{10}).\\
\texttt{nTries}				&	counter for current try (default \texttt{0}).\\
\hline
\end{tabular}
\\
The functions \texttt{nafSubmit.submitArrayToNAF} or \texttt{nafSubmit.submitToNAF} are called, depending on \texttt{nTries}.\\
After submission of the scripts with the \texttt{nafSubmit} functions, the job status is monitored with \texttt{nafSubmit.monitorJobStatus}.\\
Upon termination of all jobs, the output is checked with \texttt{haddTerminationCheck}. Scripts that did non pass the check are resubmitted to the batch system, by iteratively calling the \texttt{haddInterface} function.\\
If the \texttt{nTries} counter exceeds the \texttt{maxTries} threshold, the program is terminated.\\

\subsubsection{\textit{haddTerminationCheck}}
\texttt{nafInterface.haddTerminationCheck(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{outputScripts}		&	list of shell scripts that were submitted to the batch system.	\\
\texttt{outputFiles}		&	list of output ROOT files.	\\
\hline
\end{tabular}
\\
The log files which were created during the run of \texttt{<workdir>/haddPara.py} are checked. These contain either \texttt{OK} or \texttt{ERROR}, depending on the success of the hadding process.\\
If the status is not \texttt{OK} or the log file is missing, the job is added to a resubmission list, whcih is returned at the end of the function.


\subsubsection{\textit{renameInterface}}
\texttt{nafInterface.renameInterface(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{jobsToSubmit}		&	list of shell scripts to be submitted to the batch system.\\
\texttt{outfilesFromSubmit}	&	list of output ROOT files.\\
\texttt{maxTries}			&	maximum number of resubmits after failing (default \texttt{10}).\\
\texttt{nTries}				&	counter for current try (default \texttt{0}).\\
\hline
\end{tabular}
\\
The functions \texttt{nafSubmit.submitArrayToNAF} or \texttt{nafSubmit.submitToNAF} are called, depending on \texttt{nTries}.\\
After submission of the scripts with the \texttt{nafSubmit} functions, the job status is monitored with \texttt{nafSubmit.monitorJobStatus}.\\
Upon termination of all jobs, the output is checked with \texttt{renameTerminationCheck}. Scripts that did not pass the check are resubmitted to the batch system, by iteratively calling the \texttt{renameInterface} function.\\
If the \texttt{nTries} counter exceeds the \texttt{maxTries} treshold, the program is terminated.


\subsubsection{\textit{renameTerminationCheck}}
\texttt{nafInterface.renameTerminationCheck(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{shellScripts}	&	list of shell scripts that were submitted to the batch system.\\
\texttt{outputFiles}	&	list of output ROOT files.\\
\hline
\end{tabular}
\\
The ROOT files which were created during the batch jobs are checked. If they do not exist, the responsible job is added to a resubmission list, which is returned at the end of the function.


\subsubsection{\textit{datacardInterface}}
\texttt{nafInterface.datacardInterface(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{jobsToSubmit}	&	list of shell scripts to be submitted to the batch system.\\
\texttt{datacardFiles}	&	list of datacards to be created.\\
\texttt{maxTries}		&	maximum number of resubmits after failing (default \texttt{10}).\\
\texttt{nTries}			&	counter for current try (default \texttt{0}).\\
\hline
\end{tabular}
\\
The functions \texttt{nafSubmit.submitArrayToNAF} or \texttt{nafSubmit.submitToNAF} are called, depending on \texttt{nTries}.\\
After submission of the scripts with the \texttt{nafSubmit} functions, the job status is monitored with \texttt{nafSubmit.monitorJobStatus}.\\
Upon termination of all jobs, the output is checked with \texttt{datacardTerminationCheck}. Scripts that did not pass the check are resubmitted to the batch system, by iteratively calling the \texttt{datacardInterface} function.\\
If the \texttt{nTries} counter exceeds the \texttt{maxTries} treshold, the program is terminated.


\subsubsection{\textit{datacardTerminationCheck}}
\texttt{nafInterface.datacardTerminationCheck(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{shellScripts}	&	list of shell scripts that were submitted to the batch system.\\
\texttt{datacardFiles}	&	list of datacards that should have been created.\\
\hline
\end{tabular}
\\
The datacards which were created during the batch jobs are checked. If they do not exist, the responsible job is added to a resubmission list, which is returned at the end of the function.


\subsubsection{\textit{drawInterface}}
\texttt{nafInterface.drawInterface(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{jobsToSubmit}	&	list of shell scripts to be submitted to the batch system.\\
\texttt{outputPlots}	&	list of discriminator plots for which the output plots should be created.\\
\texttt{nTries}			&	counter for current try (default \texttt{0}).\\
\hline
\end{tabular}
\\
The functions \texttt{nafSubmit.submitArrayToNAF} or \texttt{nafSubmit.submitToNAF} are called, depending on \texttt{nTries}.\\
After submission of the scripts with the \texttt{nafSubmit} functions, the job status is monitored with \texttt{nafSubmit.monitorJobStatus}.\\
Upon termination of all jobs, the output is checked with \texttt{drawTerminationCheck}. Scripts that did not pass the check are resubmitted to the batch system, by iteratively calling the \texttt{drawInterface} function.


\subsubsection{\textit{drawTerminationCheck}}
\texttt{nafInterface.drawTerminationCheck(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{jobsToSubmit}		&	list of shell scripts that were submitted to the batch system.\\
\texttt{outputPlots}		&	list of discriminator plots for which the output plots should have been created.\\
\hline
\end{tabular}
\\
This is not implemented yet.


\subsection{\texttt{nafSubmit.py}}
The functions in this file handle the actual submission of jobs to the NAF batch system.

\subsubsection{\textit{submitToNAF}}
\texttt{nafSubmit.submitToNAF(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{scripts}		&	list of shell scripts to be submitted.\\
\texttt{holdIDs}		&	list of IDs of jobs that need to be finished before queueing the new scripts\\
						& 	(default \texttt{None}).\\
\texttt{submitOptions}	&	dictionary of submitOptions (see \ref{writeSubmitCode}).\\
\hline
\end{tabular}
\\
Calls \texttt{writeSubmitCode} to write submit code for the HTC system for every script and submits those scripts with the \texttt{condorSubmit} function.\\
If the \texttt{holdIDs} option was used, it also writes a release script with \texttt{setupRelease} which releases the other scripts to the batch system, as soon as the conditions are fulfilled.\\
It returns the list of \texttt{jobIDs} of the submitted jobs.

\subsubsection{\textit{submitArrayToNAF}}
\texttt{nafSubmit.submitArrayToNAF(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{scripts}		&	list of shell scripts to be submitted.\\
\texttt{arrayName}		&	desired name of the array file (default \texttt{''}).\\
\texttt{holdIDs}		&	list of IDs of jobs that need to be finished before queueing the new scripts\\
						& 	(default \texttt{None}).\\
\texttt{submitOptions}	&	dictionary of submitOptions (see \ref{writeSubmitCode}).\\
\hline
\end{tabular}
\\
Calls \texttt{writeArrayCode} to write the shell script that handles the submission of the scripts as an array and writes a submit code for that array script with \texttt{writeSubmitCode} and submits the script with the \texttt{condorSubmit} function.\\
If the \texttt{holdIDs} option was used, it also writes a release script with \texttt{setupRelease} which releases the other scripts to the batch system, as soon as the conditions are fulfilled.\\
It returns the list of \texttt{jobIDs} of the submitted jobs.


\subsubsection{\textit{writeArrayCode}}
\texttt{nafSubmit.writeArrayCode(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{scripts}	&	list of scripts to be concatenated as an array.\\
\texttt{arrayName}	& 	desired name of the array file.\\
\hline
\end{tabular}
\\
When submitting multiple scripts, one can use the array functionality, where instead of submitting all scripts independently, all scripts are submitted at once with an array script that manages the submit with a task ID which is being iterated.\\
This function writes the array script for this purpose and returns the path of the file.


\subsubsection{\textit{writeSubmitCode}}
\label{writeSubmitCode}
\texttt{nafSubmit.writeSubmitCode(\args, \kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{script}		&	path to shell script for which the code is written.\\
\texttt{logdir}		&	path to directory for logfiles.\\
\texttt{hold}		&	option to initiate job in hold state (default \texttt{False}).\\
\texttt{isArray}	&	indicator, wheter the shell script is for an array job (default \texttt{False}).\\
\texttt{nScripts}	&	number of jobs, if the shell script is an array job (defualt \texttt{0}).\\
\texttt{options}	&	dictionary of options for submit (see table \ref{submitOptionTable}).\\
\hline
\end{tabular}
\\
Writes submit file according to the chosen options and returns path to submit file which is used by the \texttt{condorSubmit} function to submit the job.


\begin{table}
\caption{\textbf{submit options.}}\label{submitOptionTable}
\begin{tabular}{r|rl}
option					& default 			&	\\
\hline
\texttt{RequestMemory}	& \texttt{1000M}	&	requested amount of memory for slot.\\
\texttt{RequestDisk}	& \texttt{1000M}	&	requested amount of disk for slot.\\
\texttt{+RequestRuntime}& \texttt{4800}		&	requested runtime in seconds.\\
\texttt{PeriodicHold}	& \texttt{1000}		&	put job in hold status after \texttt{x} seconds.\\
\texttt{PeriodicRelease}& \texttt{5}		&	release job from hold status after \texttt{x} seconds.\\
\end{tabular}
\end{table}


\subsubsection{\textit{setupRelease}}
\texttt{nafSubmit.setupRelease(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{oldJIDs}	&	list of jobs that need to be finished before new jobs are queued.\\
\texttt{newJIDs}	&	list of jobs that wait on old jobs to be finished before being queued.\\
\hline
\end{tabular}
\\
Writes a shell script which monitors the jobs with \texttt{oldJIDs}. It releases the jobs with \texttt{newJIDs} when the old ones are finished and submits the shell script with a submit file.\\
Returns the jobID of the job.

\subsubsection{\textit{condorSubmit}}
\texttt{nafSubmit.condorSubmit(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{submitPath}	&	path to the submit file that is supposed to be submitted.\\
\hline
\end{tabular}
\\
Submits the submit file to the NAF HTC batch system and returns its jobID.

\subsubsection{\textit{monitorJobStatus}}
\texttt{nafSubmit.monitorJobStatus(\kwargs)}\\
\begin{tabular}{r|l}
\hline
\texttt{jobIDs}	&	list of jobs that are monitored (default \texttt{None}).\\
\hline
\end{tabular}
\\
Periodically check the status of all jobs specified by \texttt{jobIDs}. This functions runs until all jobs are terminated, i.e. are neither in hold-, idle- or run- state.\\
If no jobID is specified, all jobs of the user are monitored.\\
This function cannot check, whether the job has terminated successfully or terminated with an error.


\newpage
\section{Configs}

\subsection{\texttt{plots.py}}
\label{configs_plots}

\subsubsection{\textit{getDiscriminatorPlots}}
\texttt{getDiscriminatorPlots(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{data}		& initialized instance of \texttt{catData} (see \ref{catData}).\\
\texttt{discrname}	& name of the discriminator, usually defined in \texttt{analysisClass}.\\
\hline
\end{tabular}
\\
Loads configs for different types of plots via \texttt{add\_<type>} functions into \texttt{catData}. This includes:
\begin{itemize} 
\itemsep-0.5em
\item \texttt{categories:}	 category of plot
\item \texttt{discrs:}	 discriminator of plot (sometimes depends on \texttt{finaldiscr})
\item \texttt{nhistobins:}	number of bins for plot
\item \texttt{minxvals:}	minimal x-axis value for plot
\item \texttt{maxxvals:}	maximal x-axis value for plot
\end{itemize}
Afterwards, \texttt{plotPreselections} and \texttt{binlabels} are created from \texttt{categories} with the \texttt{genPlotInput} function.\\
Finally, a list of \texttt{plotClasses.Plot}-type instances (see \ref{plotclasses_plot}) is created from that data and returned.\\
\\
Alternatively, the list of \texttt{plotClasses.Plot}-type instances can also be created directly in \texttt{add\_<type>} functions, without using the \texttt{catData} class.

\subsubsection{\textit{evtYieldCategories}}
\texttt{evtYieldCategories()}\\
Returns a list of categories, which is used for calculating the event yields during the \texttt{makeEventYields} step.


\subsection{\texttt{addVariables.py}}
\label{configs_addvars}

\subsubsection{\textit{getAddVars}}
\texttt{getAddVars()}\\
Contains hard coded list of additional variables, which is returned.


\subsection{\texttt{samples.py}}
\label{configs_samples}

\subsubsection{\textit{getSamples}}
\texttt{getSamples(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{pltcfg} & plotconfig instance, initialized in \texttt{analysisConfig}\\
\hline
\end{tabular}
\\
Loads list of \texttt{plotClasses.Sample}-type instances which contain signal and background samples. These samples are used for generating plots with \texttt{genPlots}.


\subsubsection{\textit{getControlSamples}}
\texttt{getControlSamples(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{pltcfg} & plotconfig instance, initialized in \texttt{analysisConfig}\\
\hline
\end{tabular}
\\
Loads list of \texttt{plotClasses.Sample}-type instances which contain control samples.\\ These samples are used for generating plots with \texttt{genPlots}.


\subsubsection{\textit{getSystSamples}}
\texttt{getSystSamples(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{pltcfg} & plotconfig instance, initialized in \texttt{analysisConfig}\\
\texttt{analysis} & \texttt{analysisConfig} instance \\
\texttt{samples} & list of samples from \texttt{getSamples} function \\
\hline
\end{tabular}
\\
Loads list of \texttt{plotClasses.Sample}-type instances which contain systematic samples. To determine, which systematics are used, the list created in \texttt{getSamples} is used together with multiple lists of systematic names in plotconfig.\\


\subsubsection{\textit{getAllSamples}}
\texttt{getAllSamples(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{pltcfg} & plotconfig instance, initialized in \texttt{analysisConfig}\\
\texttt{analysis} & \texttt{analysisConfig} instance \\
\texttt{samples} & list of samples from \texttt{getSamples} function \\
\hline
\end{tabular}
\\
Concatenates the lists of \texttt{plotClasses.Sample}-type instances created with \texttt{getSamples}, \texttt{getControlSamples} and \texttt{getSystSamples}.\\
This list of samples is used to create the cpp program in \texttt{plotParallel}.


\subsubsection{\textit{getAllSystNames}}
\texttt{getAllSystNames(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{pltcfg} & plotconfig instance, initialized in \texttt{analysisConfig}\\
\hline
\end{tabular}
\\
Loads multiple lists of systematic names from plotconfig.

\subsubsection{\textit{getOtherSystNames}}
\texttt{getOtherSystNames(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{pltcfg} & plotconfig instance, initialized in \texttt{analysisConfig}\\
\hline
\end{tabular}
\\
Loads multiple lists of systematic names from plotconfig.


\subsubsection{\textit{getWeightSystNames}}
\texttt{getWeightSystNames(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{pltcfg} & plotconfig instance, initialized in \texttt{analysisConfig}\\
\hline
\end{tabular}
\\
Loads list of names of weight systematics.\\
This is used for example in \texttt{plotParallel} and \texttt{renameHistos}.


\subsubsection{\textit{getSystWeights}}
\texttt{getSystWeights(\args)}\\
\begin{tabular}{r|l}
\hline
\texttt{pltcfg} & plotconfig instance, initialized in \texttt{analysisConfig}\\
\hline
\end{tabular}
\\
Loads list of systematic weights from plotconfig.\\
This is used for example in \texttt{plotParallel} and \texttt{renameHistos}.


\end{document}