-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a tool to convert data to the .raw format used as input by the HLT
- Loading branch information
Showing
3 changed files
with
361 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# convertToRaw | ||
|
||
Convert RAW data stored in one or more EDM .root files into the .raw file used as input by the HLT. | ||
|
||
``` | ||
usage: convertToRaw [-h] [-o PATH] [-f EVENTS] [-l EVENTS] [--one-file-per-lumi] FILES [FILES ...] | ||
Convert RAW data from .root format to .raw format. | ||
positional arguments: | ||
FILES input files in .root format | ||
optional arguments: | ||
-h, --help show this help message and exit | ||
-o PATH, --output PATH | ||
base path to store the output files; subdirectories based on the run number are automatically created (default: ) | ||
-f EVENTS, --events_per_file EVENTS | ||
split the output into files with at most EVENTS events (default: 50) | ||
-l EVENTS, --events_per_lumi EVENTS | ||
process at most EVENTS events in each lumisection (default: 11650) | ||
--one-file-per-lumi assume that lumisections are not split across files (and disable --events_per_lumi) (default: False) | ||
``` | ||
|
||
The default behaviour is to process a single luminosity section at a time, in order to support luminosity sections split across multiple files and a limit on the number of events in each lumisection. | ||
|
||
If neither of these features is needed (_i.e._ if lumisections are not split, and all events should be converted) the `--one-file-per-lumi` can be used to process all data with a single job, speeding up the conversion considerably. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
# convert the .raw data will appear under | ||
# store/raw/Run2022A/MinimumBias/RAW/v1/000/run353087 | ||
|
||
import sys | ||
import os | ||
import FWCore.ParameterSet.Config as cms | ||
import FWCore.ParameterSet.VarParsing as VarParsing | ||
|
||
process = cms.Process("FAKE") | ||
|
||
process.maxEvents = cms.untracked.PSet( | ||
input = cms.untracked.int32(-1) # to be overwritten after parsing the command line options | ||
) | ||
|
||
process.source = cms.Source("PoolSource", | ||
fileNames = cms.untracked.vstring() # to be overwritten after parsing the command line options | ||
) | ||
|
||
process.EvFDaqDirector = cms.Service( "EvFDaqDirector", | ||
runNumber = cms.untracked.uint32( 0 ), # to be overwritten after parsing the command line options | ||
baseDir = cms.untracked.string( "" ), # to be overwritten after parsing the command line options | ||
buBaseDir = cms.untracked.string( "" ), # to be overwritten after parsing the command line options | ||
useFileBroker = cms.untracked.bool( False ), | ||
fileBrokerKeepAlive = cms.untracked.bool( True ), | ||
fileBrokerPort = cms.untracked.string( "8080" ), | ||
fileBrokerUseLocalLock = cms.untracked.bool( True ), | ||
fuLockPollInterval = cms.untracked.uint32( 2000 ), | ||
requireTransfersPSet = cms.untracked.bool( False ), | ||
selectedTransferMode = cms.untracked.string( "" ), | ||
mergingPset = cms.untracked.string( "" ), | ||
outputAdler32Recheck = cms.untracked.bool( False ), | ||
) | ||
|
||
process.writer = cms.OutputModule("RawStreamFileWriterForBU", | ||
source = cms.InputTag('rawDataCollector'), | ||
numEventsPerFile = cms.uint32(0) # to be overwritten after parsing the command line options | ||
) | ||
|
||
process.endpath = cms.EndPath(process.writer) | ||
|
||
process.load('FWCore.MessageService.MessageLogger_cfi') | ||
process.MessageLogger.cerr.FwkReport.reportEvery = 0 # to be overwritten after parsing the command line options | ||
|
||
# parse command line options | ||
options = VarParsing.VarParsing ('python') | ||
for name in 'filePrepend', 'maxEvents', 'outputFile', 'secondaryOutputFile', 'section', 'tag', 'storePrepend', 'totalSections': | ||
del options._register[name] | ||
del options._beenSet[name] | ||
del options._info[name] | ||
del options._types[name] | ||
if name in options._singletons: | ||
del options._singletons[name] | ||
if name in options._lists: | ||
del options._lists[name] | ||
if name in options._noCommaSplit: | ||
del options._noCommaSplit[name] | ||
if name in options._noDefaultClear: | ||
del options._noDefaultClear[name] | ||
|
||
|
||
options.register('runNumber', | ||
0, | ||
VarParsing.VarParsing.multiplicity.singleton, | ||
VarParsing.VarParsing.varType.int, | ||
"Run number to use") | ||
|
||
options.register('lumiNumber', | ||
None, | ||
VarParsing.VarParsing.multiplicity.singleton, | ||
VarParsing.VarParsing.varType.int, | ||
"Luminosity section number to use") | ||
|
||
options.register('eventsPerLumi', | ||
11650, | ||
VarParsing.VarParsing.multiplicity.singleton, | ||
VarParsing.VarParsing.varType.int, | ||
"Number of events in the given luminosity section to process") | ||
|
||
options.register('eventsPerFile', | ||
50, | ||
VarParsing.VarParsing.multiplicity.singleton, | ||
VarParsing.VarParsing.varType.int, | ||
"Split the output into files with at most this number of events") | ||
|
||
options.register('outputPath', | ||
os.getcwd(), | ||
VarParsing.VarParsing.multiplicity.singleton, | ||
VarParsing.VarParsing.varType.string, | ||
"Output directory for the FED RAW data files") | ||
|
||
options.parseArguments() | ||
|
||
# check that the option values are valide | ||
if options.runNumber == 0: | ||
sys.stderr.write('Invalid run number\n') | ||
sys.exit(1) | ||
|
||
if options.lumiNumber == 0: | ||
sys.stderr.write('Invalid luminosity section number\n') | ||
sys.exit(1) | ||
|
||
if options.eventsPerLumi == 0: | ||
sys.stderr.write('Invalid number of events per luminosity section\n') | ||
sys.exit(1) | ||
|
||
if options.eventsPerFile == 0: | ||
sys.stderr.write('Invalid number of events per output file\n') | ||
sys.exit(1) | ||
|
||
# configure the job based on the command line options | ||
process.source.fileNames = options.inputFiles | ||
if options.lumiNumber is not None: | ||
# process only one lumisection | ||
process.source.lumisToProcess = cms.untracked.VLuminosityBlockRange('%d:%d' % (options.runNumber, options.lumiNumber)) | ||
process.maxEvents.input = options.eventsPerLumi | ||
process.EvFDaqDirector.runNumber = options.runNumber | ||
process.EvFDaqDirector.baseDir = options.outputPath | ||
process.EvFDaqDirector.buBaseDir = options.outputPath | ||
process.writer.numEventsPerFile = options.eventsPerFile | ||
process.MessageLogger.cerr.FwkReport.reportEvery = options.eventsPerFile | ||
|
||
# create the output directory, if it does not exist | ||
os.makedirs(options.outputPath, exist_ok=True) | ||
os.makedirs('%s/run%06d' % (options.outputPath, options.runNumber), exist_ok=True) | ||
open('%s/run%06d/fu.lock' % (options.outputPath, options.runNumber), 'w').close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
#! /usr/bin/env python3 | ||
|
||
import argparse | ||
import glob | ||
import json | ||
import os, os.path | ||
import re | ||
import shutil | ||
import socket | ||
import subprocess | ||
import sys | ||
|
||
def cmsRun(config, **args): | ||
cmd = [ 'cmsRun', config ] + [ arg + '=' + str(val) for (arg, val) in args.items() ] | ||
sys.stdout.write(' \\\n '.join(cmd)) | ||
sys.stdout.write('\n\n') | ||
status = subprocess.run(cmd, stdout=None, stderr=None) | ||
status.check_returncode() | ||
|
||
# handle error conditions | ||
if status.returncode < 0: | ||
sys.stderr.write('error: cmsRun was killed by signal %d\n' % -status.returncode) | ||
sys.exit(status.returncode) | ||
elif status.returncode > 0: | ||
sys.stderr.write('error: cmsRun exited with error code %d\n' % status.returncode) | ||
sys.exit(status.returncode) | ||
|
||
|
||
# default values | ||
events_per_file = 50 | ||
events_per_lumi = 11650 | ||
output_directory = '' | ||
|
||
parser = argparse.ArgumentParser(description='Convert RAW data from .root format to .raw format.', formatter_class = argparse.ArgumentDefaultsHelpFormatter) | ||
parser.add_argument('files', type=str, metavar='FILES', nargs='+', help='input files in .root format') | ||
parser.add_argument('-o', '--output', type=str, dest='output_directory', metavar='PATH', default='', help='base path to store the output files; subdirectories based on the run number are automatically created') | ||
parser.add_argument('-f', '--events_per_file', type=int, dest='events_per_file', metavar='EVENTS', default=events_per_file, help='split the output into files with at most EVENTS events') | ||
parser.add_argument('-l', '--events_per_lumi', type=int, dest='events_per_lumi', metavar='EVENTS', default=events_per_lumi, help='process at most EVENTS events in each lumisection') | ||
parser.add_argument('--one-file-per-lumi', action='store_true', dest='one_file_per_lumi', default=False, help='assume that lumisections are not split across files (and disable --events_per_lumi)') | ||
|
||
# parse the command line arguments and options | ||
args = parser.parse_args() | ||
if args.output_directory and args.output_directory.endswith('/'): | ||
args.output_directory = args.output_directory[:-1] | ||
|
||
# read the list of input files from the command line arguments | ||
files = [ 'file:' + f if (not ':' in f and not f.startswith('/store/') and os.path.exists(f)) else f for f in args.files ] | ||
|
||
# extract the list of runs and lumiections in the input files | ||
class FileInfo(object): | ||
def __init__(self): | ||
self.events = 0 | ||
self.files = set() | ||
|
||
header = re.compile(r'^ +Run +Lumi +# Events$') | ||
empty = re.compile(r'^ *$') | ||
content = {} | ||
|
||
for f in files: | ||
|
||
# run edmFileUtil --eventsInLumis ... | ||
output = subprocess.run(['edmFileUtil', '--eventsInLumis', f], capture_output=True, text=True) | ||
if output.returncode < 0: | ||
sys.stderr.write('error: edmFileUtil was killed by signal %d\n' % -output.returncode) | ||
sys.stderr.write('\n') | ||
sys.stderr.write(output.stderr) | ||
sys.exit(output.returncode) | ||
elif output.returncode > 0: | ||
sys.stderr.write('error: edmFileUtil exited with error code %d\n' % output.returncode) | ||
sys.stderr.write('\n') | ||
sys.stderr.write(output.stderr) | ||
sys.exit(output.returncode) | ||
|
||
# parse the output of edmFileUtil | ||
parsing = False | ||
for line in output.stdout.splitlines(): | ||
if not parsing and header.match(line): | ||
# start parsing | ||
parsing = True | ||
continue | ||
|
||
if parsing and empty.match(line): | ||
# stop parsing | ||
parsing = False | ||
continue | ||
|
||
if parsing: | ||
run, lumi, events = tuple(map(int, line.split())) | ||
if not run in content: | ||
content[run] = {} | ||
if not lumi in content[run]: | ||
content[run][lumi] = FileInfo() | ||
content[run][lumi].events += events | ||
content[run][lumi].files.add(f) | ||
|
||
# drop empty lumisections | ||
for run in content: | ||
empty_lumis = [ lumi for lumi in content[run] if content[run][lumi].events == 0 ] | ||
for lumi in empty_lumis: | ||
del content[run][lumi] | ||
|
||
# drop empty runs | ||
empty_runs = [ run for run in content if not content[run] ] | ||
for run in empty_runs: | ||
del content[run] | ||
|
||
# locate the CMSSW configuration file | ||
config_name = 'HLTrigger/Tools/python/convertToRaw.py' | ||
current_area = os.environ['CMSSW_BASE'] | ||
release_area = os.environ['CMSSW_RELEASE_BASE'] | ||
|
||
config_py = current_area + '/src/' + config_name | ||
if not os.path.exists(config_py): | ||
config_py = release_area + '/src/' + config_name | ||
if not os.path.exists(config_py): | ||
sys.stderr.write('error: cannot find the configuration file %s\n' % config_name) | ||
sys.exit(1) | ||
|
||
# convert the input data to FED RAW data format | ||
|
||
# process each run | ||
for run in sorted(content): | ||
for lumi in sorted(content[run]): | ||
print("events: %d" % content[run][lumi].events) | ||
print("file: %s" % ', '.join(content[run][lumi].files)) | ||
|
||
for run in sorted(content): | ||
|
||
# create the output directory structure | ||
run_path = args.output_directory + f'/run{run:06d}' | ||
shutil.rmtree(run_path, ignore_errors=True) | ||
os.makedirs(run_path) | ||
|
||
if args.one_file_per_lumi: | ||
# process the whole run | ||
lumis = sorted(content[run]) | ||
print('found run %d, lumis %d-%d, with %d events' % (run, min(lumis), max(lumis), sum(content[run][lumi].events for lumi in lumis))) | ||
cmsRun(config_py, inputFiles = ','.join(files), runNumber = run, eventsPerFile = args.events_per_file, outputPath = args.output_directory) | ||
|
||
else: | ||
# process lumisections individualy, then merge the output | ||
summary = { | ||
'data': [0, 0, 0, 0], # [ 'events', 'files', 'lumisections', 'last lumisection' ] | ||
'definition': run_path + '/jsd/EoR.jsd', | ||
'source': socket.getfqdn() + '_' + str(os.getpid()) | ||
} | ||
|
||
for lumi in sorted(content[run]): | ||
|
||
# process individual lumisections | ||
print('found run %d, lumi %d, with %d events' % (run, lumi, content[run][lumi].events)) | ||
lumi_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}' | ||
shutil.rmtree(lumi_path, ignore_errors=True) | ||
os.makedirs(lumi_path) | ||
cmsRun(config_py, inputFiles = ','.join(content[run][lumi].files), runNumber = run, lumiNumber = lumi, eventsPerLumi = args.events_per_lumi, eventsPerFile = args.events_per_file, outputPath = lumi_path) | ||
|
||
# merge all lumisetions data | ||
|
||
# number of events expected to be processed | ||
if args.events_per_lumi < 0: | ||
expected_events = content[run][lumi].events | ||
else: | ||
expected_events = min(args.events_per_lumi, content[run][lumi].events) | ||
|
||
# number of files expected to be created | ||
expected_files = (expected_events + args.events_per_file - 1) // args.events_per_file | ||
|
||
# find the files produced by the conversion job and move them to the per-run path | ||
lumi_base_path = args.output_directory + f'/run{run:06d}_ls{lumi:04d}' | ||
lumi_path = lumi_base_path + f'/run{run:06d}' | ||
|
||
# jsd files | ||
jsd_path = lumi_path + '/jsd' | ||
if not os.path.exists(run_path + '/jsd'): | ||
shutil.move(jsd_path, run_path) | ||
else: | ||
shutil.rmtree(jsd_path) | ||
|
||
# lumisection data and EoLS files | ||
lumi_files = glob.glob(lumi_path + f'/run{run:06d}_ls{lumi:04d}_*') | ||
for f in lumi_files: | ||
shutil.move(f, run_path + '/') | ||
|
||
# read the partial EoR file | ||
eor_file = lumi_path + f'/run{run:06d}_ls0000_EoR.jsn' | ||
with open(eor_file) as f: | ||
eor = json.load(f) | ||
produced_events = int(eor['data'][0]) | ||
produced_files = int(eor['data'][1]) | ||
produced_lumis = int(eor['data'][2]) | ||
produced_last_lumi = int(eor['data'][3]) | ||
assert produced_events == expected_events | ||
assert produced_files == expected_files | ||
assert produced_lumis == 1 | ||
assert produced_last_lumi == lumi | ||
summary['data'][0] += expected_events | ||
summary['data'][1] += expected_files | ||
summary['data'][2] += 1 | ||
summary['data'][3] = lumi | ||
os.remove(eor_file) | ||
|
||
# remove the intermediate directory | ||
shutil.rmtree(lumi_base_path, ignore_errors=True) | ||
|
||
# write the final EoR file | ||
# implemented by hand instead of using json.dump() to match the style used by the DAQ tools | ||
eor_file = run_path + f'/run{run:06d}_ls0000_EoR.jsn' | ||
f = open(eor_file, 'w') | ||
f.write('{\n "data" : [ "%d", "%d", "%d", "%d" ],\n "definition" : "%s",\n "source" : "%s"\n}\n' % (summary['data'][0], summary['data'][1], summary['data'][2], summary['data'][3], summary['definition'], summary['source'])) | ||
f.close() |