Skip to content

Commit

Permalink
Merge branch 'CMSSW_14_0_X' into scouting_nano_test_14_0_15
Browse files Browse the repository at this point in the history
  • Loading branch information
patinkaew authored Sep 10, 2024
2 parents 565b241 + 05fc563 commit 3678aaa
Show file tree
Hide file tree
Showing 49 changed files with 944 additions and 523 deletions.
143 changes: 85 additions & 58 deletions CondCore/Utilities/python/tier0.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import pycurl

tier0Url = 'https://cmsweb.cern.ch/t0wmadatasvc/prod/'
tier0Url = os.getenv('TIER0_API_URL', 'https://cmsweb.cern.ch/t0wmadatasvc/prod/')

class Tier0Error(Exception):
'''Tier0 exception.
Expand All @@ -23,7 +23,7 @@ def __init__(self, message):

def unique(seq, keepstr=True):
t = type(seq)
if t in (unicode, str):
if t is str:
t = (list, t('').join)[bool(keepstr)]
try:
remaining = set(seq)
Expand All @@ -38,44 +38,52 @@ def unique(seq, keepstr=True):
seen = []
return t(c for c in seq if not (c in seen or seen.append(c)))

#note: this exception seems unused
class ResponseError( Tier0Error ):

def __init__( self, curl, response, proxy, timeout ):
def __init__( self, curl, response, proxy, timeout, maxTime ):
super( ResponseError, self ).__init__( response )
self.args += ( curl, proxy )
self.timeout = timeout

def __str__( self ):
errStr = """Wrong response for curl connection to Tier0DataSvc from URL \"%s\"""" %( self.args[1].getinfo( self.args[1].EFFECTIVE_URL ), )
if self.args[ -1 ]:
errStr += """ using proxy \"%s\"""" %( str( self.args[ -1 ] ), )
errStr += """ with timeout \"%d\" with error code \"%d\".""" %( self.timeout, self.args[1].getinfo( self.args[1].RESPONSE_CODE) )
if self.args[0].find( '<p>' ) != -1:
errStr += """\nFull response: \"%s\".""" %( self.args[0].partition('<p>')[-1].rpartition('</p>')[0], )
self.maxTime = maxTime

def __str__(self):
errStr = f'Wrong response for curl connection to Tier0DataSvc'\
f' from URL "{self.args[1].getinfo(self.args[1].EFFECTIVE_URL)}"'
if self.args[-1]:
errStr += f' using proxy "{str(self.args[-1])}"'
errStr += f' with connection-timeout "{self.timeout}", max-time "{self.maxtime}"'\
f' with error code "{self.args[1].getinfo(self.args[1].RESPONSE_CODE)}".'
if '<p>' in self.args[0]:
full_response = self.args[0].partition('<p>')[-1].rpartition('</p>')[0]
errStr += f'\nFull response: "{full_response}".'
else:
errStr += """\nFull response: \"%s\".""" %( self.args[0], )
errStr += f'\nFull response: "{self.args[0]}".'

return errStr

#TODO: Add exceptions for each category of HTTP error codes
#TODO: check response code and raise corresponding exceptions

def _raise_http_error( curl, response, proxy, timeout ):
raise ResponseError( curl, response, proxy, timeout )
#note: this function seems to be unused
def _raise_http_error( curl, response, proxy, timeout, maxTime ):
raise ResponseError( curl, response, proxy, timeout, maxTime )

class Tier0Handler( object ):

def __init__( self, uri, timeOut, retries, retryPeriod, proxy, debug ):
def __init__( self, uri, timeOut, maxTime, retries, retryPeriod, proxy, debug ):
"""
Parameters:
uri: Tier0DataSvc URI;
timeOut: time out for Tier0DataSvc HTTPS calls;
timeOut: time out for connection of Tier0DataSvc HTTPS calls [seconds];
maxTime: maximum time for Tier0DataSvc HTTPS calls (including data transfer) [seconds];
retries: maximum retries for Tier0DataSvc HTTPS calls;
retryPeriod: sleep time between two Tier0DataSvc HTTPS calls;
retryPeriod: sleep time between two Tier0DataSvc HTTPS calls [seconds];
proxy: HTTP proxy for accessing Tier0DataSvc HTTPS calls;
debug: if set to True, enables debug information.
"""
self._uri = uri
self._timeOut = timeOut
self._maxTime = maxTime
self._retries = retries
self._retryPeriod = retryPeriod
self._proxy = proxy
Expand All @@ -90,51 +98,69 @@ def unsetDebug( self ):
def setProxy( self, proxy ):
self._proxy = proxy

def _queryTier0DataSvc( self, url ):
"""
Queries Tier0DataSvc.
url: Tier0DataSvc URL.
@returns: dictionary, from whence the required information must be retrieved according to the API call.
Raises if connection error, bad response, or timeout after retries occur.
"""
def _getCerts( self ) -> str:
cert_path = os.getenv('X509_USER_CERT', '')
key_path = os.getenv('X509_USER_KEY', '')

userAgent = "User-Agent: ConditionWebServices/1.0 python/%d.%d.%d PycURL/%s" % ( sys.version_info[ :3 ] + ( pycurl.version_info()[ 1 ], ) )

proxy = ""
if self._proxy: proxy = ' --proxy=%s ' % self._proxy

debug = " -s -S "
if self._debug: debug = " -v "
certs = ""
if cert_path:
certs += f' --cert {cert_path}'
else:
logging.warning("No certificate provided for Tier0 access, use X509_USER_CERT and"
" optionally X509_USER_KEY env variables to specify the path to the cert"
" (and the key unless included in the cert file)")
if key_path:
certs += f' --key {key_path}'
return certs

def _curlQueryTier0( self, url:str, force_debug:bool = False, force_cert:bool = False):
userAgent = "User-Agent: ConditionWebServices/1.0 python/%d.%d.%d PycURL/%s" \
% ( sys.version_info[ :3 ] + ( pycurl.version_info()[ 1 ], ) )
debug = "-v" if self._debug or force_debug else "-s -S"

proxy = f"--proxy {self._proxy}" if self._proxy else ""
certs = self._getCerts() if not self._proxy or force_cert else ""

cmd = '/usr/bin/curl -k -L --user-agent "%s" %s --connect-timeout %i --retry %i %s %s ' % (userAgent, proxy, self._timeOut, self._retries, debug, url)
cmd = f'/usr/bin/curl -k -L --user-agent "{userAgent}" {proxy}'\
f' --connect-timeout {self._timeOut} --max-time {self._maxTime} --retry {self._retries}'\
f' {debug} {url} {certs}'

# time the curl to understand if re-tries have been carried out
start = time.time()
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(stdoutdata, stderrdata) = process.communicate()
retcode = process.returncode
end = time.time()
return process.returncode, stdoutdata, stderrdata, end-start

def _queryTier0DataSvc( self, url ):
"""
Queries Tier0DataSvc.
url: Tier0DataSvc URL.
@returns: dictionary, from whence the required information must be retrieved according to the API call.
Raises if connection error, bad response, or timeout after retries occur.
"""

retcode, stdoutdata, stderrdata, query_time = self._curlQueryTier0(url)

if retcode != 0 or stderrdata:

# if the first curl has failed, logg its stderror and prepare and independent retry
msg = "looks like curl returned an error: retcode=%s and took %s seconds" % (retcode,(end-start),)
msg += ' msg = "'+str(stderrdata)+'"'
logging.error(msg)

time.sleep(10)
cmd = '/usr/bin/curl -k -L --user-agent "%s" %s --connect-timeout %i --retry %i %s %s ' % (userAgent, proxy, self._timeOut, self._retries, "-v", url)
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(stdoutdata, stderrdata) = process.communicate()
retcode = process.returncode
if retcode != 0:
msg = "looks like curl returned an error for the second time: retcode=%s" % (retcode,)
msg += ' msg = "'+str(stderrdata)+'"'
logging.error(msg)
raise Tier0Error(msg)
else :
msg = "curl returned ok upon the second try"
logging.info(msg)

# if the first curl has failed, logg its stderror and prepare and independent retry
msg = "looks like curl returned an error: retcode=%s and took %s seconds" % (retcode, query_time,)
msg += ' msg = "'+str(stderrdata)+'"'
logging.error(msg)
if self._proxy:
logging.info("before assumed proxy provides authentication, now trying with both proxy and certificate")

time.sleep(self._retryPeriod)
retcode, stdoutdata, stderrdata, query_time = self._curlQueryTier0(url, force_debug=True, force_cert=True)
if retcode != 0:
msg = "looks like curl returned an error for the second time: retcode=%s" % (retcode,)
msg += ' msg = "'+str(stderrdata)+'"'
logging.error(msg)
raise Tier0Error(msg)
else:
msg = "curl returned ok upon the second try"
logging.info(msg)
resp = json.loads( ''.join(stdoutdata.decode()).replace( "'", '"').replace(' None', ' "None"') )
return resp

Expand All @@ -149,7 +175,8 @@ def getFirstSafeRun( self ):
firstConditionSafeRunAPI = "firstconditionsaferun"
safeRunDict = self._queryTier0DataSvc( os.path.join( self._uri, firstConditionSafeRunAPI ) )
if safeRunDict is None:
errStr = """First condition safe run is not available in Tier0DataSvc from URL \"%s\"""" %( os.path.join( self._uri, firstConditionSafeRunAPI ), )
errStr = """First condition safe run is not available in Tier0DataSvc from URL \"%s\"""" \
%( os.path.join( self._uri, firstConditionSafeRunAPI ), )
if self._proxy:
errStr += """ using proxy \"%s\".""" %( str( self._proxy ), )
raise Tier0Error( errStr )
Expand All @@ -164,19 +191,20 @@ def getGlobalTag( self, config ):
Raises if connection error, bad response, timeout after retries occur, or if no Global Tags are available.
"""
data = self._queryTier0DataSvc( os.path.join( self._uri, config ) )
gtnames = sorted(unique( [ str( di[ 'global_tag' ] ) for di in data['result'] if di[ 'global_tag' ] is not None ] ))
gtnames = sorted(unique( [ str( di['global_tag'] ) for di in data['result'] if di['global_tag'] is not None ] ))
try:
recentGT = gtnames[-1]
return recentGT
except IndexError:
errStr = """No Global Tags for \"%s\" are available in Tier0DataSvc from URL \"%s\"""" %( config, os.path.join( self._uri, config ) )
errStr = """No Global Tags for \"%s\" are available in Tier0DataSvc from URL \"%s\"""" \
%( config, os.path.join( self._uri, config ) )
if self._proxy:
errStr += """ using proxy \"%s\".""" %( str( self._proxy ), )
raise Tier0Error( errStr )


def test( url ):
t0 = Tier0Handler( url, 1, 1, 1, None, debug=False)
t0 = Tier0Handler( url, 1, 5, 1, 10, None, debug=False)

print(' fcsr = %s (%s)' % (t0.getFirstSafeRun(), type(t0.getFirstSafeRun()) ))
print(' reco_config = %s' % t0.getGlobalTag('reco_config'))
Expand All @@ -186,4 +214,3 @@ def test( url ):

if __name__ == '__main__':
test( tier0Url )

3 changes: 2 additions & 1 deletion CondCore/Utilities/scripts/conddb
Original file line number Diff line number Diff line change
Expand Up @@ -706,12 +706,13 @@ def _get_hlt_fcsr( session, timeType ):

def _get_prompt_fcsr( session, timeType ):
tier0timeout = 5
tier0maxtime = 60
tier0retries = 3
tier0retryPeriod = 5
tier0proxy = None
try:
t0DataSvc = Tier0Handler( tier0Url,
tier0timeout, tier0retries, tier0retryPeriod,
tier0timeout, tier0maxtime, tier0retries, tier0retryPeriod,
tier0proxy, False )
try:
fcsr = t0DataSvc.getFirstSafeRun()
Expand Down
6 changes: 3 additions & 3 deletions Configuration/AlCa/python/autoCond.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
'run3_data_express' : '140X_dataRun3_Express_frozen_v1',
# GlobalTag for Run3 data relvals (prompt GT) - 140X_dataRun3_Prompt_v3 but snapshot at 2024-05-31 09:09:12 (UTC)
'run3_data_prompt' : '140X_dataRun3_Prompt_frozen_v3',
# GlobalTag for Run3 offline data reprocessing - snapshot at 2024-02-07 16:38:59 (UTC)
'run3_data' : '140X_dataRun3_v4',
# GlobalTag for Run3 offline data reprocessing with Prompt GT, currenlty for 2022FG - snapshot at 2024-02-12 12:00:00 (UTC)
# GlobalTag for Run3 offline data reprocessing - snapshot at 2024-09-04 16:25:09 (UTC)
'run3_data' : '140X_dataRun3_v9',
# GlobalTag for Run3 offline data reprocessing with Prompt GT, currently for 2022FG - snapshot at 2024-02-12 12:00:00 (UTC)
'run3_data_PromptAnalysis' : '140X_dataRun3_PromptAnalysis_v2',
# GlobalTag for MC production with perfectly aligned and calibrated detector for Phase1 2017 (and 0,0,~0-centred beamspot)
'phase1_2017_design' : '131X_mc2017_design_v3',
Expand Down
4 changes: 2 additions & 2 deletions Configuration/Eras/python/Era_Run3_2024_cff.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import FWCore.ParameterSet.Config as cms

from Configuration.Eras.Era_Run3_cff import Run3
from Configuration.Eras.Modifier_run3_2024_L1T_cff import run3_2024_L1T
from Configuration.Eras.Modifier_stage2L1Trigger_2024_cff import stage2L1Trigger_2024
from Configuration.Eras.Modifier_run3_scouting_nanoAOD_post2023_cff import run3_scouting_nanoAOD_post2023

Run3_2024 = cms.ModifierChain(Run3, run3_2024_L1T, run3_scouting_nanoAOD_post2023)
Run3_2024 = cms.ModifierChain(Run3, stage2L1Trigger_2024, run3_scouting_nanoAOD_post2023)
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import FWCore.ParameterSet.Config as cms

run3_2024_L1T = cms.Modifier()
stage2L1Trigger_2024 = cms.Modifier()
Original file line number Diff line number Diff line change
Expand Up @@ -1639,7 +1639,8 @@ def setup_(self, step, stepName, stepDict, k, properties):
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
},
harvest = {
'-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM'
'-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM',
'--procModifiers': 'alpakaValidation',
},
suffix = 'Patatrack_PixelOnlyAlpaka_Validation',
offset = 0.403,
Expand Down
2 changes: 1 addition & 1 deletion DQM/EcalMonitorClient/python/IntegrityClient_cfi.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
kind = cms.untracked.string('TH2F'),
otype = cms.untracked.string('Ecal3P'),
btype = cms.untracked.string('Crystal'),
description = cms.untracked.string('Summary of the data integrity. A channel is red if more than ' + str(errFractionThreshold) + ' of its entries have integrity errors.')
description = cms.untracked.string('Summary of the data integrity. A channel is red if more than ' + str(errFractionThreshold) + ' of its entries have integrity errors. Also, an entire SuperModule can show red if more than 0.01 of its entries have DCC-SRP or DCC-TCC Desync errors.')
),
Quality = cms.untracked.PSet(
path = cms.untracked.string('%(subdet)s/%(prefix)sIntegrityClient/%(prefix)sIT data integrity quality %(sm)s'),
Expand Down
13 changes: 9 additions & 4 deletions DQM/EcalMonitorClient/src/IntegrityClient.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,15 +123,20 @@ namespace ecaldqm {
}
}

// Quality check: set an entire FED to BAD if "any" DCC-SRP or DCC-TCC mismatch errors are detected
// Quality check: set an entire FED to BAD if "any" DCC-SRP or DCC-TCC mismatch errors are detected AND the number of events affected by the DCC-SRP or DCC-TCC mismatch errors is more than 1% of the events analyzed in the run
// Fill mismatch statistics
MESet const& sBXSRP(sources_.at("BXSRP"));
MESet const& sBXTCC(sources_.at("BXTCC"));
std::vector<bool> hasMismatchDCC(nDCC, false);
for (unsigned iDCC(0); iDCC < nDCC; ++iDCC) {
if (sBXSRP.getBinContent(getEcalDQMSetupObjects(), iDCC + 1) > 50. ||
sBXTCC.getBinContent(getEcalDQMSetupObjects(), iDCC + 1) > 50.) // "any" => 50
hasMismatchDCC[iDCC] = true;
int nBXSRPdesync = sBXSRP.getBinContent(getEcalDQMSetupObjects(), iDCC + 1);
int nBXTCCdesync = sBXTCC.getBinContent(getEcalDQMSetupObjects(), iDCC + 1);

if (nBXSRPdesync > 50. || nBXTCCdesync > 50.) { // "any" => 50
if (nBXSRPdesync > int(0.01 * processedEvents) || nBXTCCdesync > int(0.01 * processedEvents)) { // check if the events with DCC-SRP or DCC-TCC desyncs for the given DCC is more than 1% of the events analyzed
hasMismatchDCC[iDCC] = true;
}
}
}
// Analyze mismatch statistics
for (MESet::iterator qsItr(meQualitySummary.beginChannel(GetElectronicsMap()));
Expand Down
28 changes: 28 additions & 0 deletions DQM/EcalMonitorTasks/python/RawDataTask_cfi.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,34 @@

ecalRawDataTask = cms.untracked.PSet(
MEs = cms.untracked.PSet(
TrendBXTCC = cms.untracked.PSet(
path = cms.untracked.string('Ecal/Trends/RawDataTask number of %(prefix)sRDT bunch crossing TCC errors'),
kind = cms.untracked.string('TH1F'),
otype = cms.untracked.string('Ecal2P'),
btype = cms.untracked.string('Trend'),
description = cms.untracked.string('Trend of the number of bunch crossing value mismatches between DCC and TCC.')
),
TrendL1ATCC = cms.untracked.PSet(
path = cms.untracked.string('Ecal/Trends/RawDataTask number of %(prefix)sRDT L1A TCC errors'),
kind = cms.untracked.string('TH1F'),
otype = cms.untracked.string('Ecal2P'),
btype = cms.untracked.string('Trend'),
description = cms.untracked.string('Trend of the number of L1A value mismatches between DCC and TCC.')
),
TrendBXSRP = cms.untracked.PSet(
path = cms.untracked.string('Ecal/Trends/RawDataTask number of %(prefix)sRDT bunch crossing SRP errors'),
kind = cms.untracked.string('TH1F'),
otype = cms.untracked.string('Ecal2P'),
btype = cms.untracked.string('Trend'),
description = cms.untracked.string('Trend of the number of bunch crossing value mismatches between DCC and SRP.')
),
TrendL1ASRP = cms.untracked.PSet(
path = cms.untracked.string('Ecal/Trends/RawDataTask number of %(prefix)sRDT L1A SRP errors'),
kind = cms.untracked.string('TH1F'),
otype = cms.untracked.string('Ecal2P'),
btype = cms.untracked.string('Trend'),
description = cms.untracked.string('Trend of the number of L1A value mismatches between DCC and SRP.')
),
BXSRP = cms.untracked.PSet(
path = cms.untracked.string('%(subdet)s/%(prefix)sRawDataTask/%(prefix)sRDT bunch crossing SRP errors'),
kind = cms.untracked.string('TH1F'),
Expand Down
Loading

0 comments on commit 3678aaa

Please sign in to comment.