diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7ffd49c..1b80065 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ jobs: with: root_file: main.tex docker_image: ghcr.io/xu-cheng/texlive-small:latest - pre_compile: "tlmgr update --self && tlmgr install wrapfig enumitem titlesec svg physics biblatex biblatex-phys tocbibind siunitx cleveref transparent graphbox" + pre_compile: "tlmgr update --self && tlmgr install wrapfig enumitem titlesec physics biblatex biblatex-phys tocbibind siunitx cleveref transparent graphbox" - uses: actions/upload-artifact@v4 with: name: my-thesis diff --git a/.gitignore b/.gitignore index 96d1038..8942e5e 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,7 @@ # *.ps # *.eps # *.pdf -main.pdf + ## Generated if empty string is given at "Please type another file name for output:" .pdf !Images/** @@ -308,3 +308,5 @@ TSWLatexianTemp* # Other files .DS_Store .DS_store +main.pdf +!Portada.pdf diff --git a/Images/Satellite-view-of-LHC-tunnel-with-the-four-detectors.png b/Images/Satellite-view-of-LHC-tunnel-with-the-four-detectors.png new file mode 100644 index 0000000..b522de7 Binary files /dev/null and b/Images/Satellite-view-of-LHC-tunnel-with-the-four-detectors.png differ diff --git a/Portada.pdf b/Portada.pdf index 96865ea..9e69195 100644 Binary files a/Portada.pdf and b/Portada.pdf differ diff --git a/Title.tex b/Title.tex index c4232b0..454cd21 100644 --- a/Title.tex +++ b/Title.tex @@ -54,7 +54,7 @@ \vspace{1.0cm} - \SignatureAndDate{} + \SignatureAndDate{Heidy Sierra, Ph.D.} \\Representative, Office of Graduate Studies \vspace{1.0cm} diff --git a/chapters/Abstract.tex b/chapters/Abstract.tex index 86b1043..38847c1 100644 --- a/chapters/Abstract.tex +++ b/chapters/Abstract.tex @@ -1,8 +1,9 @@ -\chapter{Abstract} +\chapter*{Abstract} -The need for new physics has brought many exotic searches in hopes of answering the questions that the Standard Model has yet to address. +The Standard Model of Particle Physics (SM) has had a great track record over the decades. With the discovery of the top quark, the $\tau$ neutrino and the Higgs boson, the SM has proved it's effectiveness and prediction prowess. Yet, it leaves behind open questions regarding problems like Dark Matter and thus a need for new physics. This has brought up many exotic searches in hopes of answering the questions that the SM has yet to address. To provide the necessary quality to search for new physics, physicists use the most complex machines ever designed. -The preponderance of cosmological evidence suggests that the density dark matter energy density of the Universe is around 5 times the amount of regular baryonic matter, and hence, experimental searches have been developed to explain this. The CMS Collaboration has searched for signals of a dark matter model via the Emerging Jets analysis group. +The preponderance of cosmological evidence suggests that the density dark matter energy density of the Universe is around 5 times the amount of regular baryonic matter, and hence, experimental searches have been developed to explain this. +The CMS Collaboration has searched for signals of a dark matter model via the Emerging Jets analysis group. As with all experiments in High Energy physics, acquiring high quality of data is paramount to achieve groundbreaking science. The CMS experiment achieves the collection of it's high quality data through the triggering and data acquisition systems put in place, but require manual labor to certify. In this work I present trigger efficiency studies relevant to the Emerging Jets analysis. Moreover, I present my work to improve the process of data certification in the DQM workflow implemented at the CMS Tracker DQM group. This work adds the automation of a new web application called the Machine Learning playground designed to improve DQM shifter efficiency in data certification. @@ -15,9 +16,15 @@ \chapter{Abstract} \textit{Keywords}: [Emerging Jets, Dark Matter, Quantum Chromodynamics, Machine Learning, Data Quality Monitoring] -\chapter{Resumen} +\chapter*{Resumen} -La necesidad de nueva física ha llevado a muchas búsquedas exóticas con la esperanza de responder a las preguntas que el Modelo Estándar aún no ha logrado responder. Para proporcionar la calidad necesaria para buscar nueva física, los físicos utilizan las máquinas más complejas que han sido diseñadas. La preponderancia de evidencia cosmológica sugiere que la densidad de energía de la materia oscura en el universo es aproximadamente 5 la densidad de materia bariónica regular, y por lo tanto, se han desarrollado búsquedas experimentales para explicar esto. La Colaboración CMS ha buscado señales de un modelo de materia oscura a través del grupo de análisis de Jets Emergentes. Como en todos los experimentos en física de altas energías, adquirir datos de alta calidad es primordial para lograr ciencia innovadora. El experimento CMS logra la recopilación de sus datos de alta calidad a través de los sistemas de ``trigger'' y adquisición de datos implementados, pero requiere mucho trabajo manual para certificarlos. En este escrito, presento estudios de eficiencia de ``trigger'' relevantes para el análisis de ``Emerging Jets''. Además, presento mi trabajo para mejorar el proceso de la certificación de datos en el proceso de DQM implementado en el grupo de DQM del Tracker de CMS. Este trabajo añade la automatización de una nueva aplicación web llamada el ``Machine Learning Playground'', diseñada para mejorar la eficiencia de los trabajadores de turno de DQM en la certificación de datos. +El Modelo Estándar de Física de Partículas (ME) ha tenido una historia existosa en las pasadas décadas. +Con el descubrimiento del quark "cima", el neutrino $\tau$ y el bosón de Higgs, el ME ha mostrado su efectividad y su poder predictivo. +Sin embargo, deja atrás preguntas abiertas con respecto a problemas como la Materia Oscura y por la tanto, existe una necesidad de nueva física. +Esto ha llevado a muchas búsquedas exóticas con la esperanza de responder las preguntas que el Modelo Estándar aún nos deja. +Para proporcionar la calidad necesaria para buscar nueva física, los físicos utilizan las máquinas más complejas que han sido diseñadas. La preponderancia de evidencia cosmológica sugiere que la densidad de energía de la materia oscura en el universo es aproximadamente 5 veces la densidad de materia bariónica regular. Por lo tanto se han desarrollado búsquedas experimentales para explicar esto. +La Colaboración CMS ha buscado señales de un modelo de materia oscura a través del grupo de análisis de Jets Emergentes. Como en todos los experimentos en física de altas energías, adquirir datos de alta calidad es primordial para lograr ciencia innovadora. +El experimento CMS logra la recopilación de sus datos de alta calidad a través de los sistemas de ``trigger'' y adquisición de datos implementados pero requiere mucho trabajo manual para certificarlos. En este escrito, presento estudios de eficiencia de ``trigger'' relevantes para el análisis de Jets Emergentes. Además, presento mi trabajo para mejorar el proceso de la certificación de datos en el proceso de DQM implementado en el grupo de DQM del Tracker de CMS. Este trabajo añade la automatización de una nueva aplicación web llamada el ``Machine Learning Playground'', diseñada para mejorar la eficiencia de los trabajadores de turno de DQM en la certificación de datos. diff --git a/chapters/Acknowledgements.tex b/chapters/Acknowledgements.tex index 01221f6..6f12c64 100644 --- a/chapters/Acknowledgements.tex +++ b/chapters/Acknowledgements.tex @@ -1,4 +1,4 @@ -\chapter{Acknowledgments} +\chapter*{Acknowledgments} This work would not have been possible without the support of my family, colleagues, and friends. Firstly, I wish to thank my parents, family, and my partner Yarelis for their unwavering support. A special thanks to my advisor, Professor Sudhir Malik, who was fundamental to my academic growth, providing me with guidance and opportunities to shape me into what I am today. Thanks to Dr.~Scarlet Norberg (UPRM Post-Doc) for her continued support, guidance, and patience throughout this work. Scarlet also provided invaluable guidance even during my undergrad years at UPRM. diff --git a/chapters/Chapter02.tex b/chapters/Chapter02.tex index 35ff193..a53750d 100644 --- a/chapters/Chapter02.tex +++ b/chapters/Chapter02.tex @@ -13,12 +13,13 @@ \chapter{The CMS Detector\label{ch:CMS}} \begin{figure} \centering \includegraphics[width=\linewidth]{CMSLayout.png} - \caption{The CMS Detector \label{CMSLayout}} + \caption[CMS Detector]{The CMS Detector. Reprinted from \cite{CMS_detector}} + \label{CMSLayout} \end{figure} \begin{figure} \centering \includegraphics[width=.8\linewidth]{Images/CMS Coordinate.png} - \caption{The CMS coordinate system} + \caption[The CMS coordinate system]{The CMS coordinate system. Reprinted from \cite{izaakneutelings2024}} \label{fig:CMSCoord} \end{figure} The detector has an onion-like structure to capture all the particles that are produced in high-energy collisions. @@ -33,7 +34,8 @@ \chapter{The CMS Detector\label{ch:CMS}} \begin{figure}[h] \centering \includegraphics[width=.9\linewidth]{CMSLayers.png} - \caption[Particle trajectories and footprint in CMS]{The trajectory of a particle traveling through the layers of the detector leaving behind it's signature footprint\label{CMSLayers}} + \caption[Particle trajectories and footprint in CMS]{The trajectory of a particle traveling through the layers of the detector leaving behind it's signature footprint. Reprinted from \cite{How_CMS_detecs}} + \label{CMSLayers} \end{figure} diff --git a/chapters/Chapter03.tex b/chapters/Chapter03.tex index 869919a..e1d61ae 100644 --- a/chapters/Chapter03.tex +++ b/chapters/Chapter03.tex @@ -1,13 +1,16 @@ \chapter{Emerging Jets (EJs) \label{ch:emj}} +This was a multi-institution effort with physicists from the University of Puerto Rico-Mayagüez, Colorado University-Boulder, University of Maryland, Panjab University, and Fermilab, and took over two years to complete. Within this analysis team, I worked on trigger studies and determined the scale factors that adjust for any inadequate modeling of Monte Carlo signal simulation data due to trigger turn-on effects. This analysis is now public on \cite{CMS:2024gxp} and has been sent for publication in the Journal for High-Energy Physics (JHEP). \section{Background information on EJs} -Although there is a preponderance of evidence from astronomical and cosmological observations for the existence of dark matter \cite{What_is_DM}, it has not yet been detected in laboratories, suggesting that its origin may be associated with as-of-yet unobserved physics processes beyond the Standard Model. As experimental searches have excluded a large portion of the phase space of DM models with weakly interacting massive particles \cite{WIMPS}, alternative theoretical models have been developed with a hidden gauge sector, similar to quantum chromodynamics (QCD), which can result in strongly self-interacting DM particles. Dark matter of his type could interact with SM particles through so-called mediator particles and potentially be produced at colliders, producing signatures such as semivisible jets \cite{Nabili:2886140} or emerging jets\cite{sirunyan2019search}. +Although there is a preponderance of evidence from astronomical and cosmological observations for the existence of dark matter \cite{What_is_DM}, it has not yet been detected in laboratories, suggesting that its origin may be associated with as-of-yet unobserved physics processes beyond the Standard Model. +As experimental searches have excluded a large portion of the phase space of DM models with weakly interacting massive particles \cite{WIMPS}, alternative theoretical models have been developed with a hidden gauge sector, similar to quantum chromodynamics (QCD), which can result in strongly self-interacting DM particles. Dark matter of his type could interact with SM particles through so-called mediator particles and potentially be produced at colliders, producing signatures such as semivisible jets \cite{Nabili:2886140} or emerging jets\cite{sirunyan2019search}. -The emerging jets concept is described here in \cite{Schwaller:2015gea}. This was used to look for EJs in the Run 1 dataset of the CMS Experiment. -However, our current Run-2 EJ analysis surpasses the Run-1 search for EJs in the unflavored scenario, increasing the experimental limit of the dark mediator particle by 500 GeV to set the most stringent limits to date, and provides the first direct exclusion of the flavor-aligned scenario. -The EJs model is a dark matter model that assumes that there is a QCD-like hidden sector. In particular, in these high-energy collisions, a heavy dark mediator ($X_{DK}$) is produced with a mass on the $\order{\text{TeV}}$, decaying into dark hadrons and mesons that further decay into SM particles. Due to the hierarchy of GeV to TeV energy scales (see \Cref{fig:dark-qcdmodel}), the decay process allows for dark matter particles to travel a measurable distance before decaying. +The EJ concept is described in \cite{Schwaller:2015gea}. This was used to look for EJs in the Run 1 dataset of the CMS Experiment where a particular model, referred to as the "unflavored" model, is studied \cite{sirunyan2019search}. The unflavored model of EJ, is mainly concerned with the creation of a dark mediator (\Mdark) that couples to only down quarks. +However, our current Run-2 EJ analysis, which surpasses the Run-1 search for EJs in the unflavored scenario increasing the experimental limit of the dark mediator particle by 500 GeV to set the most stringent limits to date, provides the first direct exclusion of the flavor-aligned scenario. +The flavor-aligned model describes a scenario where the \Mdark couples to all down-type flavor quarks ($d, s, b$). +The EJs model is a dark matter model that assumes that there is a QCD-like hidden sector. In particular, in these high-energy collisions, \Mdark is produced with a mass on the $\order{\text{TeV}}$, decaying into dark hadrons and mesons that further decay into SM particles. Due to the hierarchy of GeV to TeV energy scales (see \Cref{fig:dark-qcdmodel}), the decay process allows for dark matter particles to travel a measurable distance before decaying. In \Cref{fig:emj_production1} we see the production processes of the EJs signature. There are 2 ways of producing EJs in the LHC. First, is through gluon-gluon fusion and second is from quark anti-quark annihilation. Both of these produce a pair of heavy dark mediators, each then decays into an SM quark (\textit{q}) and a dark quark (\Qdark). Further on, we see from \Cref{fig:full-chain} that the \Qdark will decay into \pidark. Since these dark pions are unstable and do not carry a dark baryon number, they then decay after some measurable distance into SM particles \cite{Bai_2014} and form SM jets that we can detect. @@ -32,14 +35,14 @@ \section{Background information on EJs} \caption{quark anti-quark annihilation} \end{subfigure} \end{center} - \caption[Emergin jets production modes]{Feynman diagrams for pair production of dark mediator particles, with mediators decay to an SM quark and a dark quark. The bar ($-$) over the quark symbols signify that they are anti-particles, as is the dagger ($\dagger$) over the \Mdark.} + \caption[Emergin jets production modes]{Feynman diagrams for pair production of dark mediator particles, with mediators decay to an SM quark and a dark quark. The bar ($-$) over the quark symbols signify that they are anti-particles, as is the dagger ($\dagger$) over the \Mdark. Reprinted from \cite{CMS:2024gxp}.} \label{fig:emj_production1} \end{figure} \begin{figure} \centering \includegraphics[width=.8\linewidth]{Images/EMJ_production.png} - \caption{Example of the full chain of one production mode.} + \caption[Example of the full chain of one production mode.]{Example of the full chain of one production mode. Reprinted from internal communications.} \label{fig:full-chain} \end{figure} @@ -54,9 +57,11 @@ \section{Background information on EJs} \includegraphics*[width=\textwidth]{pdfs/UnflavoredSchematicOfEvent.pdf} \caption{Unflavored} \end{subfigure} - \caption[Shorter version of the production modes for the Emerging Jets models.]{Shorter version of the production modes for the Emerging Jets models. On the left, we show the flavor-aligned model where all \Qdark couple to down-type SM quarks only (d,s,b). - This model has \pidark variable lifetimes (\ctaudpi ) which depend on their composition and the Yukawa coupling constant ($\kappa$) between the mediator particle, the dark quarks, and the SM down quark. This parameter represents the lifetime of each track inside emerging jets. - On the right, we show the simpler unflavored model. This produces \Qdark that couple to the down-quark only and all \pidark lifetimes are the same.} + \caption[Shorter version of the production modes for the Emerging Jets models.]{Shorter version of the production modes for the Emerging Jets models. On the left, we show the flavor-aligned model where all \Qdark couple to down-type SM quarks only ($d, s, b$). + This model gives \pidark variable lifetimes (\ctaudpi ) which depend on their composition and the Yukawa coupling constant ($\kappa$) between the mediator particle, the dark quarks, and the SM down quark. + This parameter represents the lifetime of each track inside emerging jets. On the right, we show the simpler unflavored model. + This produces \Qdark that couple to the down-quark only and all \pidark lifetimes are the same. + Reprinted from internal communications.} \label{fig:emj_production2} \end{figure} @@ -75,16 +80,16 @@ \section{Background information on EJs} \end{equation} where $f_{\pidark}$ is the dark pion decay constant, $m_\text{d}$ is the mass of the SM down quark, and $m_{\pidark}$ is the dark pion mass. In the flavored aligned model, the coupling constant is now a matrix $\kappa_{\alpha i}$ where the subscript $\alpha ~(i)$ denotes flavors of dark (SM) quarks. In this case, the average decay length for dark mesons is given by \Cref{eq:flavored-ctau} \begin{equation} - \small + % \small \ctaudpi^{\alpha \beta} = \dfrac{8\pi m^4_{\Mdark}}{ N_c m_{\pidark} f^2_{\pidark} \displaystyle \sum_{i,j} \abs{\kappa_{\alpha i} \kappa_{\beta j}^*}^2 \pgroup{m_i^2 + m_j^2} \sqrt{ \pgroup{1- \dfrac{(m_i^2 + m_j^2)^2 }{m^2_{\pidark}} } \pgroup{1- \dfrac{(m_i^2 - m_j^2)^2 }{m^2_{\pidark}} } } } \label{eq:flavored-ctau} \end{equation} where $m_{\Mdark}$ is the mediator mass, $N_c$ is the SM color factor and $m_i, m_j$ are the masses of the SM quarks with flavor indices $i, j$, respectively\cite{CMS:2024gxp}. \Cref{fig:lifetimes} shows the different $c\tau$ for a given $m_{\pidark}$ based on the \pidark composition in the flavor-aligned model. In general, the lifetime of the dark pions goes down as their mass increases, as opposed to the unflavored model where the lifetimes are the same for all \pidark. -\begin{figure}[b] +\begin{figure}[h] \centering \includegraphics[width=.65\linewidth]{Images/pdfs/FlavoredLifetime.pdf} - \caption[Lifetimes of the dark pions as a function of their mass.]{Lifetime of the \pidark as a function of the $m_{\pidark}$ in the flavor-aligned model. The jumps in the plot are indications of new energy states becoming available.} + \caption[Lifetimes of the dark pions as a function of their mass.]{Lifetime of the \pidark as a function of the $m_{\pidark}$ in the flavor-aligned model. The jumps in the plot are indications of new energy states becoming available. Reprinted from \cite{CMS:2024gxp}.} \label{fig:lifetimes} \end{figure} @@ -115,7 +120,10 @@ \section{Background information on EJs} \label{fig:2emj_inCMS} \end{figure} -Unfortunately, despite the increased amount of data, and the introduction of machine learning techniques to search for EJs, the results of this analysis found no evidence of the EJs signature and excluded mediator masses up to 1950 (1850) GeV for an unflavored (flavor-aligned) dark QCD model within the upper limit of 95\% statistical confidence level \cite{CMS:2024gxp}. +Unfortunately, despite the increased amount of data, and the introduction of machine learning techniques to search for EJs, the results of this analysis found no significant deviations from SM predictions. +The analysis thus excludes a region of the parameter space from being considered in future searches for EJs. More specifically, for the unflavored model, $m_{X_{DK}} <$ 1950 GeV are excluded for $c\tau_{\pi_{DK}} \approx$ 100mm and $m_{\pi_{DK}}=$~10~GeV. In the flavor-aligned scenario we exclude $m_{X_{DK}} <$ 1850 GeV at $c\tau^{max}_{\pi_{DK}} \approx$ 500mm for $m_{\pi_{DK}}$= 10GeV +This result surpasses the previous search for emerging jets in the unflavored scenario, and increases the limit of the dark mediator particle by $\approx$ 500GeV \cite{CMS:2024gxp}. + \clearpage @@ -164,7 +172,7 @@ \section{Trigger Efficiency and Scale Factor studies} The fit result is used to determine the threshold at which the $H_T$ trigger is expected to reach 99\% of their plateau value. This is also to assist in the termination of the offline $H_T$ cut applied to signal event selection, to make sure that signal events are not impacted too much by the trigger turn-on effects. \Cref{fig:HT_efficiencies} shows the trigger efficiency as a function of event $H_T$ evaluated in the 4 data collection eras using the \textit{JetHT} data stream compared with QCD simulation along with an estimate of the trigger plateau value. More specifically, \Cref{fig:HT_eff_16,fig:HT_eff_16_HIPM,fig:HT_eff_17,fig:HT_eff_18} compare efficiency for \HT trigger as a function of event \HT measured relative to \verb|HLT_Mu50_v*| in data (black) and QCD MC (gray) and fit the algebraic function \textit{f} (line). With the computation of the efficiency at each range of \HT, we can compute the ratio between the \HT in data and MC. The ratio of the trigger efficiency in data vs. that in QCD MC is applied to each signal MC event as an $H_T$-dependent scaling factor, and the difference in the event acceptance of applying the scale factor and applying the scale factors with a shifted statistical uncertainty is treated as its systematic uncertainty. -\begin{figure} +\begin{figure}[h] \centering \begin{subfigure}{.45\textwidth} \includegraphics[width=\linewidth]{Images/pdfs/16_efficiency_withratio_and_fits.pdf} @@ -196,7 +204,7 @@ \section{Trigger Efficiency and Scale Factor studies} The scale factor values used for signal MC can be found in \Cref{tab:2016_triggerSF,tab:2016HIPM_triggerSF,tab:2017_triggerSF,tab:2018_triggerSF}. The uncertainties in the table are just the statistical uncertainties of data and MC selection efficiency propagated appropriately. -\begin{table} +\begin{table}[b] \centering \caption{Scale factors (SF) and statistical uncertainties of the \HT trigger for 2016.} \label{tab:2016_triggerSF} diff --git a/chapters/Chapter05.tex b/chapters/Chapter05.tex index ea42eb5..cbe566a 100644 --- a/chapters/Chapter05.tex +++ b/chapters/Chapter05.tex @@ -14,7 +14,7 @@ \section{DQM Workflows} \begin{figure} \centering \includegraphics[width=.89\linewidth]{Images/DQM Workflow.png} - \caption{The DQM workflow} + \caption[The DQM workflow]{The DQM workflow. Reprinted from \cite{DQM_workflow}} \label{fig:DQM_workflow} \end{figure} The DQM workflow consists of 2 types: Online and Offline. @@ -29,7 +29,7 @@ \section{DQM Tools} \begin{figure} \centering \includegraphics[width=.75\linewidth]{Images/certhelper-menu.png} - \caption{\textit{Certification Helper} is a web app that allows shifters to view, certify, and gather information on a given run.} + \caption[\textit{Certification Helper} webapp.]{Screenshot of the \textit{Certification Helper}. The \textit{Certification Helper} is a web app that allows shifters to view, certify, and gather information on a given run.} \label{fig:certhelper} \end{figure} @@ -43,13 +43,13 @@ \section{DQM Tools} \begin{subfigure}{\linewidth} \includegraphics[width=1\linewidth]{Images/certhelper-list.png} \end{subfigure} - \caption{The \textit{Certification Helper} portal that allows shifters to select and view which runs are available to certify.} + \caption[The \textit{Certification Helper} portal]{The \textit{Certification Helper} portal that allows shifters to select and view which runs are available to certify. Reprinted from \cite{CertHelper}} \label{fig:certhelper-portal} \end{figure} \begin{figure} \centering \includegraphics[width=1\linewidth]{Images/certhelp-cert.png} - \caption{The view when certifying a run on \textit{Certification Helper}} + \caption[The view when certifying a run on \textit{Certification Helper}]{The view when certifying a run on \textit{Certification Helper}. Reprinted from \cite{CertHelper}} \label{fig:certhelper-cert} \end{figure} @@ -59,20 +59,20 @@ \section{DQM Tools} \begin{figure} \includegraphics*[width=\linewidth,trim= 0 7in 1in 0 ]{Images/DQM GUI.png} - \caption{The DQM GUI shows many histograms that shifters use to determine the quality of a run.} + \caption{A screenshot of the DQM GUI presenting a number of histograms that shifters use to determine the quality of a run.} \label{fig:dqmgui} \end{figure} \begin{figure} \includegraphics*[width=\linewidth,trim= 2.9in 4.4in 0 0in]{Images/RR.png} - \caption{Run Registry. This page is a database that shows the datasets where each run is classified to and also shows it's DQM certification.} + \caption{A screenshot of the Run Registry webpage. This page is a database that shows the datasets where each run is classified to and also shows it's DQM certification.} \label{fig:RR} \end{figure} \begin{figure} \includegraphics*[width=1\linewidth,trim = .8in 1.1in .9in 2.19in]{Images/OMS.png} - \caption{OMS webpage. This shows detector and data taking conditions and statistics} + \caption{A screenshot of the OMS webpage. This shows detector and data taking conditions and statistics} \label{fig:OMS} \end{figure} @@ -81,15 +81,15 @@ \section{Challenges of DQM} The current DQM process presents many challenges that need to be addressed: \begin{itemize} - \item The process ultimately depends on the decisions made by during DQM shifts. As time passes shifters must be trained regularly to learn the DQM process and each particular subsystem's specific metrics. The dependence of human shifters leaves the process vulnerable to unforeseen outside influences (such as getting sick, a pandemic, lack of worker availability, etc.). This allows for unpredictable mistakes and biases in the monitoring and certification workflows. + \item The process ultimately depends on the decisions made during DQM shifts. As time passes shifters must be trained regularly to learn the DQM process and each particular subsystem's specific metrics. The dependence of human shifters leaves the process vulnerable to unforeseen outside influences (such as getting sick, a pandemic, lack of worker availability, etc.). This allows for unpredictable mistakes and biases in the monitoring and certification workflows. - \item Worsening this, the amount of histograms to be checked is on the order of 50-100 and many have unique metrics to define what is considered nominal. People can make mistakes and miss errors even with a dedicated team of experts for each subsystem to guide the shifters. + \item Worsening this, the amount of histograms to be checked is on the order of 50-100 per hour and many have unique metrics to define what is considered nominal. People can make mistakes and miss errors even with a dedicated team of experts for each subsystem to guide the shifters. \item The detector is subject to transient problems that can be overlooked during visual inspection of the monitoring elements. \cite{ML4DQM} \item Detector conditions can change drastically enough to require a change in the selection of the reference material. Shift experts also determine this. - \item A lot of documentation can be found to learn about the DQM procedure. But again, this needs to be kept up-to-date manually. Sometimes this can be outdated information affecting shifter decisions. + \item A lot of documentation can be found to learn about the DQM procedure. But again, this needs to be kept up-to-date manually. Sometimes these can be outdated, affecting shifter decisions. \item The current workflow certifies data on a run-by-run basis (i.e. run granularity). This is especially relevant with the upcoming HL-LHC, where detector conditions will allow for more data to be collected per unit of time. \end{itemize} @@ -112,7 +112,7 @@ \section{Reference Run Ranking (non-ML)} \begin{figure} \centering \includegraphics[width=\linewidth]{Images/ranking.png} - \caption{Reference run ranking system demo.} + \caption{A screenshot of a demo of the reference run ranking system.} \label{fig:ranking} \end{figure} @@ -123,14 +123,12 @@ \section{ML Playground}\label{sect:MLP} In this project, I developed code to automate the data ingestion of the MLP by using a cronjob. This cronjob executes a query from another database called \textit{Data Aggregation System} (DAS), to gather lists of newly generated files continuously. The script later downloads and copies files to our CERN-based filesystem called EOS. Afterward, the script will index the newly copied files to the MLP database and execute the MLP's parsing capabilities, allowing the MLP to read and portray the information contained inside the files. I have added logging functionality for detailed bookkeeping in case the scripts involved fail. +There are two future tasks: First, implement robust checks of the files already present in the EOS space and attempt to copy over only newly added files to the list. +Secondly, implement a method that allows for files that are already found in the EOS to be forcibly updated or overwritten at the request of a user, if needed. \begin{figure} \centering \includegraphics*[width=\linewidth,trim = 1cm 5.2in 13.6in 0]{Images/MLP.png} - \caption{ML playground web app} + \caption{A screenshot of the ML Playground web app} \label{fig:MLplayground} \end{figure} - - -There are two future tasks: First, implement robust checks of the files already present in the EOS space and attempt to copy over only newly added files to the list. -Secondly, implement a method that allows for files that are already found in the EOS to be forcibly updated or overwritten at the request of a user, if needed. diff --git a/chapters/Conclusion.tex b/chapters/Conclusion.tex index 792881b..8bb4304 100644 --- a/chapters/Conclusion.tex +++ b/chapters/Conclusion.tex @@ -1,6 +1,18 @@ \chapter{Conclusions}\label{ch:conclusion} -The work described here shows the study of trigger efficiencies used in the Emerging Jets analysis and how it is an integral part of recording the appropriate data with high quality and sufficient statistics. The efficiency curves were used to determine the energy thresholds that the analysis would use to mitigate the turn-on effects of the triggers. Any uncertainties or deviations were properly studied and applied to simulation data to ensure that the signal was well modeled. This analysis is now public on \cite{CMS:2024gxp} and has been sent for publication in the Journal for High-Energy Physics (JHEP). The search looked for pair production of the scalar mediator at the LHC, which yields events with two SM jets and two emerging jets at leading order. The results are interpreted using two dark sector models with different flavor structures, and exclude mediator masses up to 1950 (1850) GeV for an unflavored (flavor-aligned) dark QCD model. The unflavored results surpass a previous search for emerging jets by setting the most stringent mediator mass exclusion limits to date, while the flavor-aligned results provide the first direct mediator mass exclusion limits to date. +The EJs search looked for pair production of the scalar mediator at the LHC, which yields events with two SM jets and two EJs at leading order. +The results are interpreted using two dark sector models with different flavor structures, referred to as "Flavored-aligned" and "Unflavored". +For the unflavored model, $m_{X_{DK}} <$ 1950 GeV are excluded for $c\tau_{\pi_{DK}} \approx$ 100mm and $m_{\pi_{DK}}=$~10~GeV. In the flavor-aligned scenario we exclude $m_{X_{DK}} <$ 1850 GeV at $c\tau^{max}_{\pi_{DK}} \approx$ 500mm for $m_{\pi_{DK}}$= 10GeV. +The unflavored results surpass a previous search for emerging jets by setting the most stringent mediator mass exclusion limits to date, while the flavor-aligned results provide the first direct mediator mass exclusion limits. +% This result surpasses the previous search for emerging jets in the unflavored scenario, and increases the limit of the dark mediator particle by $\approx$ 500GeV +The work described here shows my contributions to the EJs analysis in the study of trigger efficiencies and how it is an integral part of recording the appropriate data with high quality and sufficient statistics. +The efficiency curves were used to determine the energy thresholds that the analysis would use to mitigate the turn-on effects of the triggers. +Any uncertainties or deviations were properly studied and applied to simulation data to ensure that the signal was well modeled. +This analysis is now public on \cite{CMS:2024gxp} and has been sent for publication in the Journal for High-Energy Physics (JHEP). -My work also contributed to the very first design of tools that enable a machine learning-based DQM process for the CMS Tracker to meet challenges at the HL-LHC. A Run ranking system to grade reference runs was developed and tested on the certification data from CMS. This system would assist DQM shift personnel in the process of selecting new reference runs and possibly begin the automation of this procedure. Finally, my work on the development and automation for the data ingestion of the new MLP would be a first step in building a robust data exploration tool, promoting the mission of an ML-based data certification workflow for efficient DQM. + +My work also contributed to the very first design of tools that strives to enable a machine learning-based DQM process for the CMS Tracker to meet challenges at the HL-LHC. +A Run ranking system to grade reference runs was developed and tested on the certification data from CMS. +This system would assist DQM shift personnel in the process of selecting new reference runs and possibly begin the automation of this procedure. +Finally, my work on the development and automation for the data ingestion of the new MLP is a first step in building a robust data exploration tool, promoting the mission of an ML-based data certification workflow for efficient DQM. diff --git a/chapters/Introduction.tex b/chapters/Introduction.tex index 649b58d..ded5906 100644 --- a/chapters/Introduction.tex +++ b/chapters/Introduction.tex @@ -1,15 +1,24 @@ \chapter{Introduction} -The Standard Model (SM) of particle physics~\cite{What_is_SM} has been very successful in explaining many parts of the universe as it exists now. However, there are several unanswered questions for which SM is insufficient, for example, the nature of Dark Matter (DM) and Dark Energy, matter-antimatter asymmetry, unique mass of Higgs Boson, neutrino mass problem, and many more. The Large Hadron Collider (LHC) \cite{What_is_LHC} at CERN\cite{What_is_CERN} is the most powerful accelerator in the world with the ability to look into what is called Beyond the Standard Model Physics (BSM) and might help answer these questions. It will soon be upgraded to what is called the High-Luminosity LHC (HL-LHC)~\cite{hl-lhc} and will provide orders of magnitudes more collisions and data, along with new challenges. There are several experimental apparatus that populate the 27~\unit{km} circumference of the LHC that are designed to take, process, and analyze data from the proton-proton collisions at the center-of-mass energy of up to 14~\unit{TeV}. -One of these experiments is called the Compact Muon Solenoid (CMS) \cite{What_is_CMS,CMS_detector} and is one of the most sophisticated instruments built by humans. The CMS experimental collaboration is comprised of over 200 universities and institutes from over 50 countries around the globe. In the United States, there are about 50 institutions in CMS experiment where Fermilab is the host institution for these. The LHC Physics Center (LPC) at Fermilab provides its users with state-of-the-art computing facilities, a remote CMS operations center, and physics and detector expertise. The data collected from the CMS experiment is used for the work of this thesis. The LPC at Fermilab provided the facilities and tools to carry out the work described. +The Standard Model (SM) of particle physics~\cite{What_is_SM} has been very successful in explaining many parts of the universe as it exists now. However, there are several unanswered questions for which SM is insufficient, for example, the nature of Dark Matter (DM) and Dark Energy, matter-antimatter asymmetry, unique mass of Higgs Boson, neutrino mass problem, and many more. +The Large Hadron Collider (LHC) \cite{What_is_LHC} at CERN\cite{What_is_CERN} is the most powerful accelerator in the world with the ability to look into what is called Beyond the Standard Model Physics (BSM) and might help answer these questions. It will soon be upgraded to what is called the High-Luminosity LHC (HL-LHC)~\cite{hl-lhc} and will provide orders of magnitudes more collisions and data, along with new challenges. +There are several experimental apparatus that populate the 27~\unit{km} circumference of the LHC that are designed to take, process, and analyze data from the proton-proton collisions at the center-of-mass energy of up to 14~\unit{TeV}. +One of these experiments is called the Compact Muon Solenoid (CMS) \cite{CMS_detector} and is one of the most sophisticated instruments built. The CMS experimental collaboration is comprised of over 200 universities and institutes from over 50 countries around the globe. In the United States, there are about 50 institutions in the CMS experiment where Fermilab is the host institution for these. The LHC Physics Center (LPC) at Fermilab provides its users with state-of-the-art computing facilities, a remote CMS operations center, and physics and detector expertise. +The data collected from the CMS experiment is used for the work of this thesis. The LPC at Fermilab provided the facilities and tools to carry out the work described. + +\begin{figure} + \includegraphics*[width = \linewidth]{Satellite-view-of-LHC-tunnel-with-the-four-detectors.png} + \caption[Satellite view of the LHC]{Satellite view of LHC tunnel with the four detectors. Reprinted from \cite{phdthesis}} + \label{fig:Satellite} +\end{figure} CERN was founded in 1954 and is located at the Franco-Swiss border near Geneva. At CERN, physicists and engineers are probing the fundamental structure of the universe. They use the world's largest and most complex scientific instruments to study the basic constituents of matter --- the fundamental particles. -The instruments used at CERN are purpose-built particle accelerators (LHC) and detectors like CMS, ATLAS \cite{What_is_ATLAS}, LHCb \cite{What_is_LHCb}, and ALICE \cite{What_is_ALICE}. Accelerators boost beams of particles to high energies before the beams are made to collide with each other or with stationary targets. Specifically, at the LHC proton-proton beams are accelerated to up to 14~TeV and collide at few points around the LHC tunnel, like Point 5 (P5) which is the location of CMS experiment. +The instruments used at CERN are purpose-built particle accelerators (LHC) and detectors like CMS, ATLAS \cite{What_is_ATLAS}, LHCb \cite{What_is_LHCb}, and ALICE \cite{What_is_ALICE}. Accelerators boost beams of particles to high energies before the beams are made to collide with each other or with stationary targets. Specifically, at the LHC proton-proton beams are accelerated to up to 14~TeV and collide at few points around the LHC tunnel, like Point 5 (P5) which is the location of CMS experiment seen in \Cref{fig:Satellite}. Detectors observe and record the results of these collisions. The LHC accelerates the protons almost up to the speed of light. The proton collisions give physicists clues about the state of the universe just after the Big Bang and provide insights into the fundamental laws of nature. There are nine\footnote{\url{https://home.cern/science/experiments}} experiments at the LHC that analyze particles produced by proton collisions. -The biggest of these experiments, ATLAS and CMS, are general-purpose detectors designed to study the -fundamental nature of matter and fundamental forces and to look for new physics or evidence of particles that are beyond the Standard Model. Having two independently designed detectors is vital for cross-confirmation of any discoveries. The other two major experiments, ALICE and LHCb, respectively, study a state of matter (Quark-Gluon Plasma) that was present just moments after the Big Bang with a preponderance of matter over antimatter. Each experiment does important research that is key to understanding the universe that surrounds and makes us. +The biggest of these experiments, ATLAS and CMS, are general-purpose detectors designed to study the fundamental nature of matter and fundamental forces and to look for new physics or evidence of particles that are beyond the Standard Model. +Having two independently designed detectors is vital for cross-confirmation of any discoveries. The other two major experiments, ALICE and LHCb, respectively, study a state of matter (Quark-Gluon Plasma) that was present just moments after the Big Bang with a preponderance of matter over antimatter. Each experiment does important research that is key to understanding the universe that surrounds and makes us. This thesis work focuses on studies performed towards a search for ``Emerging Jets'' (EJs) produced in proton-proton collisions at a center-of-mass energy of 13 TeV. The data collected by the CMS experiment corresponds to an integrated luminosity of 138 \unit{fb^{-1}}. This search examines a hypothetical dark quantum chromodynamics (QCD) sector that couples to the SM matter through a scalar mediator. The scalar mediator decays into an SM quark and a DM quark. As the DM quark showers and hadronizes, it produces long-lived dark mesons that subsequently decay into SM particles, resulting in a jet, known as an emerging jet, with multiple displaced vertices. We looked for pair production of the scalar mediator which yields events with two SM jets and two emerging jets at leading order. This analysis \cite{CMS:2024gxp} excluded mediator masses up to 1950 (1850) GeV for an unflavored (flavor-aligned) dark QCD model. The unflavored results surpass a previous search for emerging jets \cite{sirunyan2019search} by setting the most stringent mediator mass exclusion limits to date, while the flavor-aligned results provide the first direct mediator mass exclusion limits to date. This was a multi-institution effort with physicists from the University of Puerto Rico-Mayagüez, Colorado University-Boulder, University of Maryland, Panjab University, and Fermilab, and took over two years to complete. Within this analysis team, I worked on trigger studies and determined the scale factors that adjust for any inadequate modeling of Monte Carlo signal simulation data due to trigger turn-on effects. This analysis is now public on \cite{CMS:2024gxp} and has been sent for publication in the Journal for High-Energy Physics (JHEP). @@ -27,7 +36,6 @@ \chapter{Introduction} The thesis work has been presented at conferences. \cite{user-meeting2023,prism2022,DPF,prism2019} - My research experience also led me to contribute to software trainings in HEP, broader impacts and outreach. These are described in the \hyperlink{appendix}{Appendix} diff --git a/main.tex b/main.tex index cb8d873..e43c8c6 100644 --- a/main.tex +++ b/main.tex @@ -1,20 +1,21 @@ % !TEX root = main.tex %Specify document class -\documentclass[12pt,twoside,openany]{book} +\documentclass[11pt,twoside,openany]{book} \input{preamble.tex} \begin{document} -\input{Title.tex} - \frontmatter + +\input{Title.tex} \doublespace \include{chapters/Abstract} \include{chapters/Acknowledgements} \singlespace \tableofcontents +\listoftables \listoffigures \clearpage \doublespace diff --git a/preamble.tex b/preamble.tex index 589d7ab..64bf6da 100644 --- a/preamble.tex +++ b/preamble.tex @@ -39,11 +39,11 @@ chaptertitle=false,pageranges=false, backref=false,defernumbers ]{biblatex} -%\bibliography{references.bib} -\addbibresource{references.bib} +%\bibliography{references.bib} % for bibtex +\addbibresource{references.bib} % biblatex % \DeclareFieldFormat[report]{title}{\printtext[doi/url-link]{\mkbibemph{#1}}} \usepackage[nottoc,numbib]{tocbibind} -% \settocbibname{References} +% \settocbibname{References} % Bibliography is default \usepackage[table,xcdraw]{xcolor} \usepackage{siunitx} @@ -72,7 +72,7 @@ % To have the transparent command -\usepackage{svg} +% \usepackage{svg} \setlist[itemize]{topsep=\parskip} diff --git a/references.bib b/references.bib index 3f6eb3b..879b5b2 100644 --- a/references.bib +++ b/references.bib @@ -1,4 +1,39 @@ - +@phdthesis{phdthesis, +author = {Cavicchioli, Costanza and Masotti, L. and Biagi, Elena and Bozzini, Davide}, +year = {2007}, +month = {07}, +pages = {}, +keywords = {content, image}, +title = {Fault detection on the Large Hadron Collider at CERN: design, simulation and realization of a High Voltage Pulse Generator} +} + +@online{izaakneutelings2024, + author = {Izaak Neutelings}, + title = {{CMS} coordinate system}, + month = {05}, + year = {2024}, + url = {https://tikz.net/axis3d_cms/}, + keywords = {content, image} +} + +@online{CertHelper, + title = {User manual — Certification Helper documentation}, + url = {https://certifier.readthedocs.io/en/latest/user-manual.html}, + urldate = {2024-05-04}, + keywords = {content, image}, +} + +@article{DQM_workflow, +author = {Rovere, M}, +year = {2015}, +month = {12}, +pages = {072039}, +title = {{The Data Quality Monitoring Software for the CMS experiment at the LHC}}, +volume = {664}, +journal = {Journal of Physics: Conference Series}, +doi = {10.1088/1742-6596/664/7/072039}, +keywords={content, image} +} @article{CMS:2008xjf, author = {{CMS Collaboration}}, @@ -330,22 +365,22 @@ @online{What_is_CERN date-added = {2024-03-26 14:18:04 -0400}, date-modified = {2024-03-26 18:01:22 -0400}, keywords = {content}, - organization = {{CMS}}, + organization = {{CERN}}, title = {About {CERN}}, url = {https://home.cern/about}, year = {2023}, bdsk-url-1 = {https://home.cern/about}} -@online{What_is_CMS, +@online{How_CMS_detecs, author = {CERN}, date-added = {2024-03-26 14:18:04 -0400}, date-modified = {2024-03-26 18:01:22 -0400}, keywords = {content}, organization = {CMS}, - title = {About {CMS}}, - url = {http://cms.web.cern.ch/news/what-cms}, + title = {How {CMS} detects particles}, + url = {https://cms.cern/news/how-cms-detects-particles}, year = {2023}, - bdsk-url-1 = {http://cms.web.cern.ch/news/what-cms}} + } @online{What_is_DM, author = {CERN}, @@ -502,7 +537,8 @@ @electronic{physcon howpublished = {Physcon 2019}, keywords = {presentation,th}, month = {11}, - title = {{Machine Learning in DQM at CMS Experiment}}, + title = {{Data Quality Monitoring using Machine Learning +for CMS Experiment at CERN}}, url = {https://guillermofidalgo.github.io/assets/pdfs/Physcon_Poster.pdf}, year = {2019}, bdsk-url-1 = {https://guillermofidalgo.github.io/assets/pdfs/Physcon_Poster.pdf}} @@ -513,7 +549,7 @@ @electronic{prism2019 date-modified = {2024-03-26 18:00:33 -0400}, keywords = {presentation,th}, organization = {PRLSAMP2019, Mayag{\"u}ez, PR}, - title = {{Using Machine Learning for DQM at CMS}}, + title = {{Using Machine Learning Techniques for Data Quality Monitoring at CMS Experiment}}, url = {https://guillermofidalgo.github.io/assets/pdfs/ML4DQM_PRISM_2019_Talk.pdf}, bdsk-url-1 = {https://guillermofidalgo.github.io/assets/pdfs/ML4DQM_PRISM_2019_Talk.pdf}} @@ -614,7 +650,7 @@ @electronic{carp21 date-added = {2024-03-26 14:00:25 -0400}, date-modified = {2024-03-26 18:00:08 -0400}, keywords = {workshop}, - title = {Data Analysis for lab research}, + title = {Software Carpentry (Virtual)}, url = {https://indico.cern.ch/event/1097111/}, bdsk-url-1 = {https://indico.cern.ch/event/1097111/}} @@ -622,6 +658,7 @@ @electronic{carp22 author = {Guillermo Fidalgo}, date-added = {2024-03-26 14:00:25 -0400}, date-modified = {2024-03-26 18:00:08 -0400}, + date = {2022-09-28}, keywords = {workshop}, title = {Software Basics Training (Virtual)}, url = {https://indico.cern.ch/event/1190572/}, @@ -631,6 +668,7 @@ @electronic{carp22b author = {Guillermo Fidalgo}, date-added = {2024-03-26 14:00:25 -0400}, date-modified = {2024-03-26 18:00:08 -0400}, + date = {2022-03-28}, keywords = {workshop}, title = {Software Basics Training (Virtual)}, url = {https://indico.cern.ch/event/1112526/},