diff --git a/Calibration/HcalCalibAlgos/macros/CalibCorr.C b/Calibration/HcalCalibAlgos/macros/CalibCorr.C index 6c8a2c3e12f2d..475f4d17667fe 100644 --- a/Calibration/HcalCalibAlgos/macros/CalibCorr.C +++ b/Calibration/HcalCalibAlgos/macros/CalibCorr.C @@ -256,10 +256,12 @@ public : ~CalibCorr() {} float getCorr(int run, unsigned int id); + double getCorr(const Long64_t& entry); private: void readCorrRun(const char* infile); void readCorrDepth(const char* infile); void readCorrResp(const char* infile); + void readCorrPU(const char* infile); unsigned int getDetIdHE(int ieta, int iphi, int depth); unsigned int getDetId(int subdet, int ieta, int iphi, int depth); unsigned int correctDetId(const unsigned int& detId); @@ -268,6 +270,7 @@ private: int flag_; bool debug_; std::map corrFac_[nmax_], corrFacDepth_, corrFacResp_; + std::map cfactors_; std::vector runlow_; }; @@ -382,6 +385,7 @@ CalibCorr::CalibCorr(const char* infile, int flag, bool debug) : << " for i/p file " << infile << std::endl; if (flag == 1) readCorrDepth(infile); else if (flag == 2) readCorrResp(infile); + else if (flag == 3) readCorrPU(infile); else readCorrRun(infile); } @@ -420,6 +424,13 @@ float CalibCorr::getCorr(int run, unsigned int id) { return cfac; } +double CalibCorr::getCorr(const Long64_t& entry) { + double cfac(-1.0); + std::map::iterator itr = cfactors_.find(entry); + if (itr != cfactors_.end()) cfac = itr->second; + return cfac; +} + void CalibCorr::readCorrRun(const char* infile) { std::cout << "Enters readCorrRun for " << infile << std::endl; @@ -574,6 +585,28 @@ void CalibCorr::readCorrResp(const char* infile) { } } +void CalibCorr::readCorrPU(const char* infile) { + + if (std::string(infile) != "") { + std::ifstream fInput(infile); + if (!fInput.good()) { + std::cout << "Cannot open file " << infile << std::endl; + } else { + double val1, val2; + cfactors_.clear(); + while (1) { + fInput >> val1 >> val2; + if (!fInput.good()) break; + Long64_t entry = (Long64_t)(val1); + cfactors_[entry] = val2; + } + fInput.close(); + } + } + std::cout << "Reads " << cfactors_.size() << " PU correction factors from " + << infile << std::endl; +} + unsigned int CalibCorr::getDetIdHE(int ieta, int iphi, int depth) { return getDetId(2,ieta,iphi,depth); } diff --git a/Calibration/HcalCalibAlgos/macros/CalibMonitor.C b/Calibration/HcalCalibAlgos/macros/CalibMonitor.C index 1fa7c0fa35254..ae6fc2542827f 100644 --- a/Calibration/HcalCalibAlgos/macros/CalibMonitor.C +++ b/Calibration/HcalCalibAlgos/macros/CalibMonitor.C @@ -59,9 +59,11 @@ // information (x=3/2/1/0 for having 1000/500/50/ // 100 bins for response distribution in (0:5); // m=1/0 for (not) making plots for each RBX; -// l=2/1/0 for type of rcorFileName (2 for overall -// response corrections; 1 for depth dependence -// corrections; 0 for raddam corrections); +// l=3/2/1/0 for type of rcorFileName (3 for +// pileup correction using machine learning +// method; 2 for overall response corrections; +// 1 for depth dependence corrections; +// 0 for raddam corrections); // t=1/0 for applying cut or not on L1 closeness; // h = 0/1/2 for not creating / creating in // recreate mode / creating in append mode @@ -267,13 +269,14 @@ public : virtual void Loop(); virtual Bool_t Notify(); virtual void Show(Long64_t entry = -1); - bool goodTrack (double &eHcal, double &cut, bool debug); + bool goodTrack (double &eHcal, double &cut, + const Long64_t& entry, bool debug); bool selectPhi(bool debug); void plotHist(int type, int num, bool save=false); template void drawHist(Hist*, TCanvas*); void savePlot(const std::string& theName, bool append, bool all=false); - void correctEnergy(double & ener); + void correctEnergy(double & ener, const Long64_t& entry); private: static const unsigned int npbin=5, kp50=2; @@ -290,7 +293,7 @@ private: bool exclude_, corrE_, cutL1T_, selRBX_; bool includeRun_; int coarseBin_, etamp_, etamn_, plotType_; - int flexibleSelect_; + int flexibleSelect_, ifDepth_; double log2by18_; std::ofstream fileout_; std::vector entries_; @@ -337,7 +340,7 @@ CalibMonitor::CalibMonitor(const char* fname, if (plotType_ < 0 || plotType_ > 3) plotType_ = 3; flexibleSelect_ = (((flag_/1) %10)); cutL1T_ = ((flag_/1000) %10); - int ifDepth = ((flag_/10000) %10); + ifDepth_ = ((flag_/10000) %10); selRBX_ = (((flag_/100000) %10) > 0); coarseBin_ = ((flag_/1000000) %10); log2by18_ = std::log(2.5)/18.0; @@ -357,7 +360,7 @@ CalibMonitor::CalibMonitor(const char* fname, << selRBX_ << " Vertex Range " << nvxlo_ << ":" << nvxhi_ << "\n corrFileName: " << corrFileName << " useScale " << useScale << ":" << scale << ":" << etam << "\n rcorFileName: " - << rcorFileName << " flag " << ifDepth << std::endl; + << rcorFileName << " flag " << ifDepth_ << std::endl; if (!fillChain(chain,fname)) { std::cout << "*****No valid tree chain can be obtained*****" << std::endl; } else { @@ -365,8 +368,11 @@ CalibMonitor::CalibMonitor(const char* fname, << " entries" << std::endl; corrFactor_ = new CalibCorrFactor(corrFileName,useScale,scale,etam,false); Init(chain,dupFileName,comFileName,outFName); - if (std::string(rcorFileName) != "") - cFactor_ = new CalibCorr(rcorFileName,ifDepth,false); + if (std::string(rcorFileName) != "") { + cFactor_ = new CalibCorr(rcorFileName,ifDepth_,false); + } else { + ifDepth_ = 0; + } if (rbx != 0) cSelect_ = new CalibSelectRBX(rbx, false); } } @@ -900,7 +906,7 @@ void CalibMonitor::Loop() { // Selection of good track and energy measured in Hcal double rat(1.0), eHcal(t_eHcal); - if (corrFactor_-> doCorr() || (cFactor_ != 0)) { + if (corrFactor_-> doCorr() || (cFactor_ != nullptr)) { eHcal = 0; for (unsigned int k=0; ksize(); ++k) { // The masks are defined in DataFormats/HcalDetId/interface/HcalDetId.h @@ -909,7 +915,8 @@ void CalibMonitor::Loop() { unsigned int id = truncateId((*t_DetIds)[k],truncateFlag_,false); cfac = corrFactor_->getCorr(id); } - if (cFactor_ != 0) cfac *= cFactor_->getCorr(t_Run,(*t_DetIds)[k]); + if ((cFactor_ != nullptr) && (ifDepth_ != 3)) + cfac *= cFactor_->getCorr(t_Run,(*t_DetIds)[k]); eHcal += (cfac*((*t_HitEnergies)[k])); if (debug) { int subdet,zside,ieta,iphi,depth; @@ -920,7 +927,7 @@ void CalibMonitor::Loop() { } } } - bool goodTk = goodTrack(eHcal, cut, debug); + bool goodTk = goodTrack(eHcal, cut, jentry, debug); bool selPhi = selectPhi(debug); if (p4060) { if (t_qltyFlag) { @@ -1064,7 +1071,7 @@ void CalibMonitor::Loop() { } } unsigned int k(0); - std::cout << "\nSummary of entries with " << runSum.size() << "runs\n"; + std::cout << "\nSummary of entries with " << runSum.size() << " runs\n"; for (std::map::iterator itr=runSum.begin(); itr != runSum.end(); ++itr, ++k) std::cout << "[" << k << "] Run " << itr->first << " Total " @@ -1072,7 +1079,7 @@ void CalibMonitor::Loop() { << ":" << (itr->second).count[1] << ":" << (itr->second).count[2] << ":" << (itr->second).count[3] << std::endl; k = 0; - std::cout << "\n" << runEn1.size() << " runs with 0 energy in HCAL\n"; + std::cout << runEn1.size() << " runs with 0 energy in HCAL\n"; for (std::map::iterator itr=runEn1.begin(); itr != runEn1.end(); ++itr, ++k) std::cout << "[" << k << "] Run " << itr->first << " Total " @@ -1080,8 +1087,7 @@ void CalibMonitor::Loop() { << ":" << (itr->second).count[1] << ":" << (itr->second).count[2] << ":" << (itr->second).count[3] << std::endl; k = 0; - std::cout << "\n" << runEn2.size() << " runs with 0 energy in ECAL and HCAL" - << std::endl; + std::cout << runEn2.size() << " runs with 0 energy in ECAL and HCAL\n"; for (std::map::iterator itr=runEn2.begin(); itr != runEn2.end(); ++itr, ++k) std::cout << "[" << k << "] Run " << itr->first << " Total " @@ -1104,7 +1110,8 @@ void CalibMonitor::Loop() { std::cout < 0) ? t_ieta : -t_ieta; cut = 8.0*exp(eta*log2by18_); } - correctEnergy(eHcal); + correctEnergy(eHcal, entry); select = ((t_qltyFlag) && (t_selectTk) && (t_hmaxNearP < cut) && (t_eMipDR < 1.0) && (eHcal > 0.001)); if (debug) { @@ -1281,10 +1288,15 @@ void CalibMonitor::savePlot(const std::string& theName, bool append, bool all) { theFile->Close(); } -void CalibMonitor::correctEnergy(double& eHcal) { +void CalibMonitor::correctEnergy(double& eHcal, const Long64_t& entry) { bool debug(false); double pmom = (useGen_ && (t_gentrackP>0)) ? t_gentrackP : t_p; - if ((corrPU_ < 0) && (pmom > 0)) { + if ((ifDepth_ == 3) && (cFactor_ != nullptr)) { + double cfac = cFactor_->getCorr(entry); + eHcal *= cfac; + if (debug) std::cout << "PU Factor for " << ifDepth_ << ":" << entry + << " = " << cfac << ":" << eHcal << std::endl; + } else if ((corrPU_ < 0) && (pmom > 0)) { double ediff = (t_eHcal30-t_eHcal10); if (t_DetIds1 != 0 && t_DetIds3 != 0) { double Etot1(0), Etot3(0); @@ -1913,11 +1925,12 @@ public : virtual void Loop(); virtual Bool_t Notify(); virtual void Show(Long64_t entry = -1); - bool goodTrack (double &eHcal, double &cut, bool debug); + bool goodTrack (double &eHcal, double &cut, + const Long64_t& entry, bool debug); bool selectPhi(bool debug); void savePlot(const std::string& theName, bool append, bool all=false); - void correctEnergy(double & ener); + void correctEnergy(double & ener, const Long64_t& entry); private: static const unsigned int npbin=5, kp50=2, ndepth=7; @@ -1931,6 +1944,7 @@ private: const int etalo_, etahi_; int runlo_, runhi_; const int phimin_,phimax_,zside_, nvxlo_, nvxhi_, rbx_; + int ifDepth_; bool exclude_, corrE_, cutL1T_; bool includeRun_, getHist_; int flexibleSelect_; @@ -1992,7 +2006,7 @@ CalibPlotProperties::CalibPlotProperties(const char* fname, plotBasic_ = (((flag_/10)%10) > 0); plotEnergy_ = (((flag_/10)%10) > 0); cutL1T_ = ((flag_/1000) %10); - int ifDepth = ((flag_/10000) %10); + ifDepth_ = ((flag_/10000) %10); plotHists_ = (((flag_/100000) %10) > 0); log2by18_ = std::log(2.5)/18.0; if (runlo_ < 0 || runhi_ < 0) { @@ -2012,7 +2026,7 @@ CalibPlotProperties::CalibPlotProperties(const char* fname, << rbx << " Vertex Range " << nvxlo_ << ":" << nvxhi_ << "\n corrFileName: " << corrFileName << " useScale " << useScale << ":" << scl << ":" << etam << "\n rcorFileName: " - << rcorFileName << " flag " << ifDepth << std::endl; + << rcorFileName << " flag " << ifDepth_ << std::endl; if (!fillChain(chain,fname)) { std::cout << "*****No valid tree chain can be obtained*****" << std::endl; } else { @@ -2020,8 +2034,11 @@ CalibPlotProperties::CalibPlotProperties(const char* fname, << " entries" << std::endl; Init(chain,dupFileName); corrFactor_ = new CalibCorrFactor(corrFileName,useScale,scl,etam,false); - if (std::string(rcorFileName) != "") - cFactor_ = new CalibCorr(rcorFileName,ifDepth,false); + if (std::string(rcorFileName) != "") { + cFactor_ = new CalibCorr(rcorFileName,ifDepth_,false); + } else { + ifDepth_ = 0; + } if (rbx != 0) cSelect_ = new CalibSelectRBX(rbx, false); } } @@ -2444,7 +2461,7 @@ void CalibPlotProperties::Loop() { } } } - bool goodTk = goodTrack(eHcal, cut, debug); + bool goodTk = goodTrack(eHcal, cut, jentry, debug); bool selPhi = selectPhi(debug); double rat = (pmom > 0) ? (eHcal/(pmom-t_eMipDR)) : 1.0; if (debug) @@ -2517,7 +2534,7 @@ void CalibPlotProperties::Loop() { if (cFactor_ != 0) cfac *= cFactor_->getCorr(t_Run,(*t_DetIds)[k]); double ener = cfac*(*t_HitEnergies)[k]; - if (corrPU_) correctEnergy(ener); + if (corrPU_) correctEnergy(ener, jentry); if (ener < 0.001) bad = true; } if ((!bad) && (std::fabs(rat-1)<0.15) && (kp == kp50) && @@ -2535,7 +2552,7 @@ void CalibPlotProperties::Loop() { if (cFactor_ != 0) cfac *= cFactor_->getCorr(t_Run,(*t_DetIds)[k]); double ener = cfac*(*t_HitEnergies)[k]; - if (corrPU_) correctEnergy(ener); + if (corrPU_) correctEnergy(ener, jentry); unsigned int idx = (unsigned int)((*t_DetIds)[k]); int subdet, zside, ieta, iphi, depth; unpackDetId(idx,subdet,zside,ieta,iphi,depth); @@ -2581,7 +2598,8 @@ void CalibPlotProperties::Loop() { << " HE " << selHE << std::endl; } -bool CalibPlotProperties::goodTrack(double& eHcal, double &cuti, bool debug) { +bool CalibPlotProperties::goodTrack(double& eHcal, double &cuti, + const Long64_t& entry, bool debug) { bool select(true); double cut(cuti); @@ -2592,7 +2610,7 @@ bool CalibPlotProperties::goodTrack(double& eHcal, double &cuti, bool debug) { double eta = (t_ieta > 0) ? t_ieta : -t_ieta; cut = 8.0*exp(eta*log2by18_); } - correctEnergy(eHcal); + correctEnergy(eHcal, entry); select = ((t_qltyFlag) && (t_selectTk) && (t_hmaxNearP < cut) && (t_eMipDR < 100.0) && (eHcal > 0.001)); if (debug) { @@ -2699,10 +2717,13 @@ void CalibPlotProperties::savePlot(const std::string& theName, bool append, theFile->Close(); } -void CalibPlotProperties::correctEnergy(double& eHcal) { +void CalibPlotProperties::correctEnergy(double& eHcal, const Long64_t& entry) { double pmom = (useGen_ && (t_gentrackP>0)) ? t_gentrackP : t_p; - if ((corrPU_ < 0) && (pmom > 0)) { + if ((ifDepth_ == 3) && (cFactor_ != nullptr)) { + double cfac = cFactor_->getCorr(entry); + eHcal *= cfac; + } else if ((corrPU_ < 0) && (pmom > 0)) { double ediff = (t_eHcal30-t_eHcal10); if (t_DetIds1 != 0 && t_DetIds3 != 0) { double Etot1(0), Etot3(0); diff --git a/Calibration/HcalCalibAlgos/macros/CalibTree.C b/Calibration/HcalCalibAlgos/macros/CalibTree.C index 6f03419354c05..91c716c77762f 100644 --- a/Calibration/HcalCalibAlgos/macros/CalibTree.C +++ b/Calibration/HcalCalibAlgos/macros/CalibTree.C @@ -57,7 +57,8 @@ // maxIter (int) = number of iterations (30) // rcorForm (int) = type of rcorFileName: (0) for Raddam correction, // (1) for depth dependent corrections; (2) for -// RespCorr corrections; (0) +// RespCorr corrections; (3) use machine learning +// method for pileup correction. (Default 0) // useGen (bool) = use generator level momentum information (false) // runlo (int) = lower value of run number to be included (+ve) // or excluded (-ve) (default 0) @@ -165,7 +166,7 @@ public : bool useWeight, double fraction, bool debug); void fitPol0(TH1D* hist, bool debug); void highEtaFactors(int ietaMax, bool debug); - energyCalor energyHcal(double pmom, bool final); + energyCalor energyHcal(double pmom, const Long64_t& entry, bool final); TChain *fChain; //!pointer to the analyzed TTree or TChain Int_t fCurrent;//!current Tree number in a TChain @@ -262,7 +263,7 @@ private: const int phimin_, phimax_; const int zside_, nvxlo_, nvxhi_; const int sysmode_, rbx_, puCorr_; - const int rcorForm_; + int rcorForm_; const bool useGen_, exclude_; const int higheta_; bool includeRun_; @@ -312,6 +313,8 @@ void Run(const char *inFileName, const char *dirName, const char *treeName, Long64_t nentryTot = chain->GetEntries(); Long64_t nentries = (fraction > 0.01 && fraction < 0.99) ? (Long64_t)(fraction*nentryTot) : nentryTot; + static const int maxIterMax = 100; + if (maxIter > maxIterMax) maxIter = maxIterMax; std::cout << "Tree " << name << " " << chain << " in directory " << dirName << " from file " << inFileName << " with nentries (tracks): " << nentries << std::endl; @@ -331,7 +334,7 @@ void Run(const char *inFileName, const char *dirName, const char *treeName, << std::endl; fout->cd(); - double cvgs[100], itrs[100]; + double cvgs[maxIterMax], itrs[maxIterMax]; for (; k<=kmax; ++k) { std::cout << "Calling Loop() " << k << "th time" << std::endl; double cvg = t.Loop(k, fout, useweight, nMin, inverse, ratMin, ratMax, @@ -391,8 +394,11 @@ CalibTree::CalibTree(const char *dupFileName, const char* rcorFileName, << " Treat RBX " << rbx_ << " with exclusion mode " << exclude_ << std::endl; Init(tree, dupFileName); - if (std::string(rcorFileName) != "") + if (std::string(rcorFileName) != "") { cFactor_ = new CalibCorr(rcorFileName,rcorForm_,false); + } else { + rcorForm_ = 0; + } if (rbx != 0) cSelect_ = new CalibSelectRBX(rbx); } @@ -623,9 +629,8 @@ Double_t CalibTree::Loop(int loop, TFile *fout, bool useweight, int nMin, sprintf (title, "Correction for Subdet %d #eta %d depth %d (Loop %d)", subdet, zside*ieta, depth, loop); TH1D* hist = new TH1D(name,title,100, 0.0, 5.0); hist->Sumw2(); - if (debug) { + if (debug) std::cout << "Book Histo " << k << " " << title << std::endl; - } histos[detIds[k]] = hist; } std::cout << "Total of " << detIds.size() << " detIds and " << histos.size() @@ -667,7 +672,7 @@ Double_t CalibTree::Loop(int loop, TFile *fout, bool useweight, int nMin, double pmom = (useGen_ && (t_gentrackP > 0)) ? t_gentrackP : t_p; if (goodTrack()) { ++ntkgood; - CalibTree::energyCalor en = energyHcal(pmom, true); + CalibTree::energyCalor en = energyHcal(pmom, jentry, true); double evWt = (useweight) ? t_EventWeight : 1.0; if (en.ehcal > 0.001) { double pufac = (en.Etot > 0) ? (en.ehcal/en.Etot) : 1.0; @@ -732,10 +737,9 @@ Double_t CalibTree::Loop(int loop, TFile *fout, bool useweight, int nMin, } } } - if (debug) { + if (debug) std::cout << "# of Good Tracks " << ntkgood << " out of " << nentries << std::endl; - } if (loop==0) { h_pbyE->Write("h_pbyE"); h_Ebyp_bfr->Write("h_Ebyp_bfr"); @@ -763,6 +767,7 @@ Double_t CalibTree::Loop(int loop, TFile *fout, bool useweight, int nMin, } } + if (debug) std::cout << "Histos with " << histos.size() << " entries\n"; for (std::map::const_iterator itr = histos.begin(); itr != histos.end(); ++itr,++kount) { std::pair result = fitMean(itr->second, 0); @@ -784,6 +789,7 @@ Double_t CalibTree::Loop(int loop, TFile *fout, bool useweight, int nMin, } } + if (debug) std::cout << "SumW with " << SumW.size() << " entries\n"; std::map::const_iterator SumWItr = SumW.begin(); for (; SumWItr != SumW.end(); SumWItr++) { unsigned int detid = SumWItr->first; @@ -815,7 +821,9 @@ Double_t CalibTree::Loop(int loop, TFile *fout, bool useweight, int nMin, } } - double dets[150], cfacs[150], wfacs[150], myId[150], nTrk[150]; + static const int maxch = 500; + double dets[maxch], cfacs[maxch], wfacs[maxch], myId[maxch], nTrk[maxch]; + std::cout << "cafctors: " << cfactors.size() << ":" << maxch << std::endl; kount = 0; std::map >::const_iterator itr=cfactors.begin(); for (; itr !=cfactors.end(); ++itr,++kount) { @@ -1060,19 +1068,21 @@ void CalibTree::makeplots(double rmin, double rmax, int ietaMax, nb = fChain->GetEntry(jentry); nbytes += nb; if (goodTrack()) { double pmom = (useGen_ && (t_gentrackP > 0)) ? t_gentrackP : t_p; - CalibTree::energyCalor en1 = energyHcal(pmom, false); - CalibTree::energyCalor en2 = energyHcal(pmom, true); - double evWt = (useweight) ? t_EventWeight : 1.0; - double ratioi = en1.ehcal/(pmom-t_eMipDR); - double ratiof = en2.ehcal/(pmom-t_eMipDR); - if (t_ieta >= -ietaMax && t_ieta <= ietaMax && t_ieta != 0) { - if (ratioi>=rmin && ratioi<=rmax) { - histos[0].first->Fill(ratioi,evWt); - histos[t_ieta].first->Fill(ratioi,evWt); - } - if (ratiof>=rmin && ratiof<=rmax) { - histos[0].second->Fill(ratiof,evWt); - histos[t_ieta].second->Fill(ratiof,evWt); + CalibTree::energyCalor en1 = energyHcal(pmom, jentry, false); + CalibTree::energyCalor en2 = energyHcal(pmom, jentry, true); + if ((en1.ehcal > 0.001) && (en2.ehcal > 0.001)) { + double evWt = (useweight) ? t_EventWeight : 1.0; + double ratioi = en1.ehcal/(pmom-t_eMipDR); + double ratiof = en2.ehcal/(pmom-t_eMipDR); + if (t_ieta >= -ietaMax && t_ieta <= ietaMax && t_ieta != 0) { + if (ratioi>=rmin && ratioi<=rmax) { + histos[0].first->Fill(ratioi,evWt); + histos[t_ieta].first->Fill(ratioi,evWt); + } + if (ratiof>=rmin && ratiof<=rmax) { + histos[0].second->Fill(ratiof,evWt); + histos[t_ieta].second->Fill(ratiof,evWt); + } } } } @@ -1166,7 +1176,9 @@ void CalibTree::highEtaFactors(int ietaMax, bool debug) { } } -CalibTree::energyCalor CalibTree::energyHcal(double pmom, bool final) { +CalibTree::energyCalor CalibTree::energyHcal(double pmom, + const Long64_t& entry, + bool final) { double etot = t_eHcal; double etot2 = t_eHcal; @@ -1220,8 +1232,9 @@ CalibTree::energyCalor CalibTree::energyHcal(double pmom, bool final) { ediff = etot3-etot1; } // PU correction only for loose isolation cut - double ehcal = ((puCorr_ == 0) ? etot : + double ehcal = (((rcorForm_ == 3) && (cFactor_ != nullptr)) ? + (etot * cFactor_->getCorr(entry)) : ((puCorr_ == 0) ? etot : ((puCorr_ < 0) ? (etot*puFactor(-puCorr_,t_ieta,pmom,etot,ediff)) : - puFactorRho(puCorr_,t_ieta,t_rhoh,etot))); + puFactorRho(puCorr_,t_ieta,t_rhoh,etot)))); return CalibTree::energyCalor(etot,etot2,ehcal); } diff --git a/Calibration/HcalCalibAlgos/macros/isotrackApplyRegressor.py b/Calibration/HcalCalibAlgos/macros/isotrackApplyRegressor.py new file mode 100644 index 0000000000000..b285eadddb93d --- /dev/null +++ b/Calibration/HcalCalibAlgos/macros/isotrackApplyRegressor.py @@ -0,0 +1,112 @@ +###################################################################################### +# Evaluates regressor from loaded model +# Usage: +# python3 isotrackApplyRegressor.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -M ./models/model1.h5 -O corrfac_regression.txt +###################################################################################### +# coding: utf-8 + +# In[1]: + + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import argparse +import keras +from keras.models import Sequential +from keras.layers import Dense +from keras.layers import Dropout +from keras.utils.np_utils import to_categorical +from keras.utils import plot_model +from keras import regularizers +from sklearn.metrics import roc_curve, auc +from keras.layers import Activation +from keras import backend as K +from keras.models import load_model +import uproot + + +# In[2]: + +parser = argparse.ArgumentParser() +parser.add_argument("-PU", "--filePU",help="input PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root") + +parser.add_argument("-M", "--modelname",help="model file name",default="./models/model.h5") +parser.add_argument("-O", "--opfilename",help="output text file name",default="corrfac_regression.txt") + + +fName1 = parser.parse_args().filePU +modeln = parser.parse_args().modelname +foutput = parser.parse_args().opfilename + + + +#fName1='/eos/uscms/store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root' +tree1 = uproot.open(fName1,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree'] +print ("loaded files") + +branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh'] +dictpu = tree1.arrays(branches=branchespu) +dfspu = pd.DataFrame.from_dict(dictpu) +dfspu.columns=branchespu +print ("sample size:",dfspu.shape[0]) + + +# In[3]: + + +dfspu['t_delta']=dfspu['t_eHcal30']-dfspu['t_eHcal10'] +keepvars =['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt','t_eHcal', 't_p', 't_eMipDR'] + +df = dfspu[keepvars].copy() +df['t_eHcal_xun'] = df['t_eHcal'] +df['t_delta_un'] = df['t_delta'] +df['t_ieta_un'] = df['t_ieta'] + +#cols_to_stand = ['t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x'] +#cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt'] +cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal'] +#df[cols_to_stand] = df[cols_to_stand].apply(lambda x: (x - x.mean()) /(x.std())) +#df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.mean()) / (x.max() - x.min())) +# #(x.max() - x.min())) +df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) / (x.max() - x.min())) + + +uncorrected_values = df['t_eHcal_xun'].values +print (uncorrected_values.shape) +print ('data vars:',df.keys()) +data = df.values +X_train = data[:,0:12] + + +# In[4]: + + +model = load_model(modeln) +preds = model.predict(X_train,verbose=1) +preds = preds.reshape(preds.shape[0]) +print (preds.shape) + + +# In[5]: + + +plt.hist(preds, bins =100, range=(0,100),label='predicted enerhy',alpha=0.6) +plt.savefig('predicted_Edist.png') + + +# In[6]: + + +eventnumarray = np.arange(0,X_train.shape[0],1,dtype=int) +eventnumarray = eventnumarray.reshape(eventnumarray.shape[0],1) +corrfac = preds/uncorrected_values +corrfac = corrfac.reshape(corrfac.shape[0],1) +fileo = np.hstack((eventnumarray,corrfac)) + + +# In[ ]: + + +np.savetxt(foutput, fileo) + diff --git a/Calibration/HcalCalibAlgos/macros/isotrackNtupler.py b/Calibration/HcalCalibAlgos/macros/isotrackNtupler.py new file mode 100644 index 0000000000000..de33ab542d99f --- /dev/null +++ b/Calibration/HcalCalibAlgos/macros/isotrackNtupler.py @@ -0,0 +1,83 @@ +###################################################################################### +# Makes pkl and text files comparing PU and noPU samples for training regressor and other stuff +# Usage: +# python3 isotrackNtupler.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -NPU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root -O isotk_relval +###################################################################################### + + + +import uproot +import numpy as np +import pandas as pd +import argparse +import matplotlib.pyplot as plt + +parser = argparse.ArgumentParser() +parser.add_argument("-PU", "--filePU",help="input PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root") +parser.add_argument("-NPU", "--fileNPU",help="input no PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root") +parser.add_argument("-O", "--opfilename",help="ouput file name",default="isotk_relval") + + +fName1 = parser.parse_args().filePU +fName2 = parser.parse_args().fileNPU +foutput = parser.parse_args().opfilename + + +# PU +tree1 = uproot.open(fName1,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree'] + +#no PU +tree2 = uproot.open(fName2,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree'] +#tree2.keys() +print ("loaded files") +branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_selectTk','t_qltyFlag'] +branchesnpu = ['t_Event','t_ieta','t_iphi','t_eHcal'] +#dictn = tree.arrays(branches=branches,entrystart=0, entrystop=300) +dictpu = tree1.arrays(branches=branchespu) +dictnpu = tree2.arrays(branches=branchesnpu) +dfspu = pd.DataFrame.from_dict(dictpu) +dfspu.columns=branchespu +dfsnpu = pd.DataFrame.from_dict(dictnpu) +dfsnpu.columns=branchesnpu +print ("loaded dicts and dfs") +print ("PU sample size:",dfspu.shape[0]) +print ("noPU sample size:",dfsnpu.shape[0]) +merged = pd.merge(dfspu, dfsnpu , on=['t_Event','t_ieta','t_iphi']) +print ("selected common events before cut:",merged.shape[0]) + +#cuts = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<10)&(merged['t_eMipDR_y']<1) +keepvars = ['t_nVtx','t_ieta','t_eHcal10','t_eHcal30','t_delta','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_pt','t_eHcal_x','t_eHcal_y','t_p','t_eMipDR'] + + + +#########################all ietas +cuts1 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10) +merged1=merged.loc[cuts1] +merged1 = merged1.reset_index(drop=True) +print ("selected events after cut for all ietas:",merged1.shape[0]) +merged1['t_delta']=merged1['t_eHcal30']-merged1['t_eHcal10'] +final_df_all = merged1[keepvars] +final_df_all.to_pickle(foutput+"_all.pkl") +final_df_all.to_csv(foutput+"_all.txt") +#########################split ieta < 16 + +cuts2 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_ieta'])<16)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10) +merged2=merged.loc[cuts2] +merged2 = merged2.reset_index(drop=True) +print ("selected events after cut for ieta < 16:",merged2.shape[0]) +merged2['t_delta']=merged2['t_eHcal30']-merged2['t_eHcal10'] +final_df_low = merged2[keepvars] +final_df_low.to_pickle(foutput+"_lo.pkl") +final_df_low.to_csv(foutput+"_lo.txt") + +#########################split ieta > 15 + +cuts3 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_ieta'])>15)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10) +merged3=merged.loc[cuts3] +merged3 = merged3.reset_index(drop=True) +print ("selected events after cut for ieta > 15:",merged3.shape[0]) +merged3['t_delta']=merged3['t_eHcal30']-merged3['t_eHcal10'] +final_df_hi = merged3[keepvars] +final_df_hi.to_pickle(foutput+"_hi.pkl") +final_df_hi.to_csv(foutput+"_hi.txt") + diff --git a/Calibration/HcalCalibAlgos/macros/isotrackTrainRegressor.py b/Calibration/HcalCalibAlgos/macros/isotrackTrainRegressor.py new file mode 100644 index 0000000000000..bfcfca40200d6 --- /dev/null +++ b/Calibration/HcalCalibAlgos/macros/isotrackTrainRegressor.py @@ -0,0 +1,219 @@ +###################################################################################### +# Trains regressor and saves model for evaluation +# Usage: +# python3 isotrackTrainRegressor.py -I isotk_relval_hi.pkl -V 1 +###################################################################################### + + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import argparse +import keras +from keras.models import Sequential +from keras.layers import Dense +from keras.layers import Dropout +from keras.utils.np_utils import to_categorical +from keras.utils import plot_model +from keras import regularizers +from sklearn.metrics import roc_curve, auc +from keras.layers import Activation +from keras import backend as K +from keras.models import save_model + + + +parser = argparse.ArgumentParser() +parser.add_argument("-I", "--input",help="input file",default="isotk_relval_hi.pkl") +parser.add_argument("-V", "--modelv",help="model version (any number) ",default="1") + + + +fName = parser.parse_args().input +modv = parser.parse_args().modelv +# In[2]: + + +df = pd.read_pickle(fName) +print ("vars in file:",df.keys()) +df = df.loc[df['t_eHcal_y'] > 20] +df['t_eHcal_xun'] = df['t_eHcal_x'] +df['t_delta_un'] = df['t_delta'] +df['t_ieta_un'] = df['t_ieta'] +#cols_to_stand = ['t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x'] +#cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt'] +cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x'] +#df[cols_to_stand] = df[cols_to_stand].apply(lambda x: (x - x.mean()) /(x.std())) +#df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.mean()) / (x.max() - x.min())) +# #(x.max() - x.min())) +df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) / (x.max() - x.min())) + +data = df.values +print ('data shape:',data.shape) +targets = data[:,12] +targets.shape + + +# In[3]: + + +data = df.values +ntest = 20000 +testindx = data.shape[0] - ntest +X_train = data[:testindx,0:12] +Y_train = data[:testindx,12] +X_test = data[testindx:,:] +print ("shape of X_train:",X_train.shape) +print ("shape of Y_train:",Y_train.shape) +print ("shape of X_test:",X_test.shape) +meany = np.mean(Y_train) +print ("mean y:",meany) +stdy = np.std(Y_train) +print ("std y:", stdy) + + +# In[4]: + + +############################################# marinas correction form +a0 = [0.973, 0.998, 0.992, 0.965 ] +a1 = [0, -0.318, -0.261, -0.406] +a2 = [0, 0, 0.047, 0.089] +def fac0(jeta): + PU_IETA_1 = 9 + PU_IETA_2 = 16 + PU_IETA_3 = 25 + idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3)) + return a0[idx] +def fac1(jeta): + PU_IETA_1 = 9 + PU_IETA_2 = 16 + PU_IETA_3 = 25 + idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3)) + return a1[idx] +def fac2(jeta): + PU_IETA_1 = 9 + PU_IETA_2 = 16 + PU_IETA_3 = 25 + idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3)) + return a2[idx] + +vec0 = np.vectorize(fac0) +vec1 = np.vectorize(fac1) +vec2 = np.vectorize(fac2) + +X_test[:,17] +eop = (X_test[:,15]/X_test[:,13]) +dop = (X_test[:,16]/X_test[:,13]) +#mcorrval = vec0(abs(X_test[:,17])) + vec1(abs(X_test[:,17]))*(X_test[:,15]/X_test[:,13])*(X_test[:,16]/X_test[:,13])*( 1 + vec2(fac(abs(X_test[:,17])))*(X_test[:,16]/X_test[:,13])) + +mcorrval = vec0(abs(X_test[:,17])) + vec1(abs(X_test[:,17]))*eop*dop*( 1 + vec2(abs(X_test[:,17]))*dop) + + +# In[5]: + + +def propweights(y_true): + weight = np.copy(y_true) + weight[abs(y_true - meany) > 0] = 1.90*abs(y_true - meany)/stdy #1.25 +# weight[abs(y_true - meany) > stdy] = 1.75*abs((weight[abs(y_true - meany) > stdy]) - meany)/(stdy) + weight[abs(y_true - meany) < stdy] = 1 + return weight + + +# In[6]: + + +from keras import optimizers +print ("creating model=========>") +model = Sequential() +model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu')) +model.add(Dropout(0.2)) +model.add(Dense(500, activation='relu',kernel_regularizer=regularizers.l2(0.01))) +model.add(Dense(500, activation='relu',kernel_regularizer=regularizers.l2(0.01))) +model.add(Dense(60, activation='relu',kernel_regularizer=regularizers.l2(0.01))) +model.add(Dense(1)) + +RMS = keras.optimizers.RMSprop(lr=0.001, rho=0.9) +# Compile model +print ("compilation up next=======>") +model.compile(loss='logcosh',optimizer='adam') +model.summary() +#fitting +print ("fitting now=========>") +history = model.fit(X_train,Y_train , batch_size=5000, epochs=1000, validation_split=0.2, verbose=1,sample_weight=propweights(Y_train)) + + +# In[7]: + + +preds = model.predict(X_test[:,0:12]) +targets = X_test[:,12] +uncorrected = X_test[:,15] +marinascorr = X_test[:,15]*mcorrval +plt.hist(preds, bins =100, range=(0,200),label='PU regression',alpha=0.6) +plt.hist(targets, bins =100, range=(0,200),label='no PU',alpha=0.6) +plt.hist(uncorrected, bins =100, range=(0,200),label='uncorrected',alpha=0.6) +#plt.hist(marinascorr, bins =100, range=(0,200),label='marinas correction',alpha=0.6) +plt.yscale('log') +plt.title("Energy distribution") +plt.legend(loc='upper right') +plt.savefig('energy_distributions.png') + + +preds = preds.reshape(preds.shape[0]) +print (preds.shape) + + + +#plt.hist(preds, bins =100, range=(0,100),label='PU regression',alpha=0.6) +#plt.hist(abs(preds -targets)/targets , bins =100, range=(0,40),label='PU regression',alpha=0.6) +#plt.hist(targets, bins =100, range=(0,100),label='no PU',alpha=0.6) +plt.hist(abs(uncorrected -targets)/targets*100, bins =100, range=(0,100),label='uncorrected',alpha=0.6) +#plt.hist(abs(marinascorr -targets)/targets*100, bins =100, range=(0,100),label='marinas correction',alpha=0.6) +plt.hist(100*abs(preds -targets)/targets, bins =100, range=(0,100),label='PU correction',alpha=0.6) +#plt.yscale('log') +plt.title("error distribution") +plt.legend(loc='upper right') +plt.savefig('errors.png') + + +#plt.scatter(targets, marinascorr,alpha=0.3,label='marinascorr') +plt.scatter(targets, uncorrected,alpha=0.3,label='uncorr') +plt.scatter(targets, preds,alpha=0.3,label='PUcorr') +plt.xlabel('True Values ') +plt.ylabel('Predictions ') +plt.legend(loc='upper right') +lims = [0, 200] +plt.xlim(lims) +plt.ylim(lims) +#_ = plt.plot(lims, lims) +plt.savefig('energyscatt.png') + + + + +pmom= X_test[:,13] +#get_ipython().run_line_magic('matplotlib', 'inline') +plt.hist(targets/pmom, bins =100, range=(0,5),label='E/p noPU',alpha=0.6) +#plt.hist(preds/pmom, bins =100, range=(0,5),label='E/p PUcorr',alpha=0.6) +#plt.hist(uncorrected/pmom, bins =100, range=(0,5),label='E/p uncorr',alpha=0.6) +plt.hist(marinascorr/pmom, bins =100, range=(0,5),label='E/p marina corr',alpha=0.6) +#plt.hist(np.exp(preds.reshape((preds.shape[0])))/pmom[0:n_test_events], bins =100, range=(0,5),label='corrEnp/p',alpha=0.3) +#plt.hist(preds.reshape((preds.shape[0]))/pmom[0:n_test_events], bins =100, range=(0,5),label='corrEnp/p',alpha=0.3) +plt.legend(loc='upper right') +plt.yscale('log') +plt.title("E/p distributions") +plt.savefig('eopdist.png') + + +# In[8]: + + +import os +############## save model +if not os.path.exists('models'): + os.makedirs('models') +model.save('models/model'+modv+'.h5') +#new_model_2 = load_model('my_model.h5') +