Skip to content

Commit

Permalink
CPV: mute raw decoder error reporting for 10 minutes if it reports mo…
Browse files Browse the repository at this point in the history
…re than 10 errors per minute
  • Loading branch information
sevdokim authored and davidrohr committed Apr 22, 2022
1 parent e537af1 commit b6c90b3
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ namespace o2
namespace cpv
{

class RawDecoderError
{
public:
struct RawDecoderError {
RawDecoderError() = default; //Constructors for vector::emplace_back methods
RawDecoderError(short c, short d, short g, short p, RawErrorType_t e) : ccId(c), dil(d), gas(g), pad(p), errortype(e) {}
RawDecoderError(const RawDecoderError& e) = default;
Expand All @@ -37,7 +35,6 @@ class RawDecoderError
short gas;
short pad;
RawErrorType_t errortype;
ClassDefNV(RawDecoderError, 1);
};

union AddressCharge {
Expand Down Expand Up @@ -99,6 +96,9 @@ class RawDecoder
/// \return Reference to the list of decoding errors
const std::vector<o2::cpv::RawDecoderError>& getErrors() const { return mErrors; }

/// \brief mute error reporting
void muteErrors() { mIsMuteErrors = true; }

protected:
/// \brief Read channels for the current event in the raw buffer
RawErrorType_t readChannels();
Expand All @@ -111,9 +111,10 @@ class RawDecoder
std::vector<uint32_t> mDigits; ///< vector of channels and BCs in the raw stream
std::vector<o2::cpv::BCRecord> mBCRecords; ///< vector of bc references to digits
std::vector<RawDecoderError> mErrors; ///< vector of decoding errors
bool mChannelsInitialized = false; ///< check whether the channels are initialized
bool mChannelsInitialized; ///< check whether the channels are initialized
bool mIsMuteErrors; ///< mute errors

ClassDefNV(RawDecoder, 2);
ClassDefNV(RawDecoder, 3);
};

} // namespace cpv
Expand Down
58 changes: 38 additions & 20 deletions Detectors/CPV/reconstruction/src/RawDecoder.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
using namespace o2::cpv;

RawDecoder::RawDecoder(RawReaderMemory& reader) : mRawReader(reader),
mChannelsInitialized(false)
mChannelsInitialized(false),
mIsMuteErrors(false)
{
}

RawErrorType_t RawDecoder::decode()
{

auto& rdh = mRawReader.getRawHeader();
short linkID = o2::raw::RDHUtils::getLinkID(rdh);
mDigits.clear();
Expand All @@ -42,6 +42,12 @@ RawErrorType_t RawDecoder::decode()
RawErrorType_t RawDecoder::readChannels()
{
mChannelsInitialized = false;
// // test error
// if (!mIsMuteErrors) {
// LOG(error) << "RawDecoder::readChannels() : "
// << "test error";
// }
// mErrors.emplace_back(-1, 0, 0, 0, kOK); //5 is non-existing link with general errors

auto& payloadWords = mRawReader.getPayload();
uint32_t wordCountFromLastHeader = 1; //header word is included
Expand All @@ -59,20 +65,24 @@ RawErrorType_t RawDecoder::readChannels()
<< "I read cpv header for orbit = " << header.orbit()
<< " and BC = " << header.bc();
if (!isHeaderExpected) { //actually, header was not expected
LOG(error) << "RawDecoder::readChannels() : "
<< "header was not expected";
if (!mIsMuteErrors) {
LOG(error) << "RawDecoder::readChannels() : "
<< "header was not expected";
}
removeLastNDigits(nDigitsAddedFromLastHeader); //remove previously added digits as they are bad
mErrors.emplace_back(5, 0, 0, 0, kNO_CPVTRAILER);
mErrors.emplace_back(-1, 0, 0, 0, kNO_CPVTRAILER);
}
skipUntilNextHeader = false;
currentBC = header.bc();
wordCountFromLastHeader = 0;
nDigitsAddedFromLastHeader = 0;
if (currentOrbit != header.orbit()) { //bad cpvheader
LOG(error) << "RawDecoder::readChannels() : "
<< "currentOrbit(=" << currentOrbit
<< ") != header.orbit()(=" << header.orbit() << ")";
mErrors.emplace_back(5, 0, 0, 0, kCPVHEADER_INVALID); //5 is non-existing link with general errors
if (!mIsMuteErrors) {
LOG(error) << "RawDecoder::readChannels() : "
<< "currentOrbit(=" << currentOrbit
<< ") != header.orbit()(=" << header.orbit() << ")";
}
mErrors.emplace_back(-1, 0, 0, 0, kCPVHEADER_INVALID); //5 is non-existing link with general errors
skipUntilNextHeader = true;
}
} else {
Expand All @@ -89,8 +99,10 @@ RawErrorType_t RawDecoder::readChannels()
if (addDigit(pw.mDataWord, word.ccId(), currentBC)) {
nDigitsAddedFromLastHeader++;
} else {
LOG(debug) << "RawDecoder::readChannels() : "
<< "read pad word with non-valid pad address";
if (!mIsMuteErrors) {
LOG(debug) << "RawDecoder::readChannels() : "
<< "read pad word with non-valid pad address";
}
unsigned int dil = pw.dil, gas = pw.gas, address = pw.address;
mErrors.emplace_back(word.ccId(), dil, gas, address, kPadAddress);
}
Expand All @@ -103,28 +115,34 @@ RawErrorType_t RawDecoder::readChannels()
if (diffInCount > 1 ||
diffInCount < -1) {
//some words lost?
LOG(error) << "RawDecoder::readChannels() : "
<< "Read " << wordCountFromLastHeader << " words, expected " << trailer.wordCounter();
mErrors.emplace_back(5, 0, 0, 0, kCPVTRAILER_INVALID);
if (!mIsMuteErrors) {
LOG(error) << "RawDecoder::readChannels() : "
<< "Read " << wordCountFromLastHeader << " words, expected " << trailer.wordCounter();
}
mErrors.emplace_back(-1, 0, 0, 0, kCPVTRAILER_INVALID);
//throw all previous data and go to next header
removeLastNDigits(nDigitsAddedFromLastHeader);
skipUntilNextHeader = true;
}
if (trailer.bc() != currentBC) {
//trailer does not fit header
LOG(error) << "RawDecoder::readChannels() : "
<< "CPVHeader BC is " << currentBC << " but CPVTrailer BC is " << trailer.bc();
mErrors.emplace_back(5, 0, 0, 0, kCPVTRAILER_INVALID);
if (!mIsMuteErrors) {
LOG(error) << "RawDecoder::readChannels() : "
<< "CPVHeader BC(" << currentBC << ") != CPVTrailer BC(" << trailer.bc() << ")";
}
mErrors.emplace_back(-1, 0, 0, 0, kCPVTRAILER_INVALID);
removeLastNDigits(nDigitsAddedFromLastHeader);
skipUntilNextHeader = true;
}
isHeaderExpected = true;
} else {
wordCountFromLastHeader++;
//error
LOG(error) << "RawDecoder::readChannels() : "
<< "Read unknown word";
mErrors.emplace_back(5, 0, 0, 0, kUNKNOWN_WORD); //add error for non-existing row
if (!mIsMuteErrors) {
LOG(error) << "RawDecoder::readChannels() : "
<< "Read unknown word";
}
mErrors.emplace_back(-1, 0, 0, 0, kUNKNOWN_WORD); //add error for non-existing row
//what to do?
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// or submit itself to any jurisdiction.

#include <vector>

#include <chrono>
#include "Framework/DataProcessorSpec.h"
#include "Framework/Task.h"
#include "Framework/ConcreteDataMatcher.h"
Expand Down Expand Up @@ -68,12 +68,18 @@ class RawToDigitConverterSpec : public framework::Task
char CheckHWAddress(short ddl, short hwAddress, short& fee);

private:
bool mIsUsingGainCalibration; ///< Use gain calibration from CCDB
bool mIsUsingBadMap; ///< Use BadChannelMap to mask bad channels
bool mIsPedestalData; ///< Do not subtract pedestals if true
std::vector<Digit> mOutputDigits; ///< Container with output cells
std::vector<TriggerRecord> mOutputTriggerRecords; ///< Container with output cells
std::vector<RawDecoderError> mOutputHWErrors; ///< Errors occured in reading data
bool mIsUsingGainCalibration; ///< Use gain calibration from CCDB
bool mIsUsingBadMap; ///< Use BadChannelMap to mask bad channels
bool mIsPedestalData; ///< Do not subtract pedestals if true
std::vector<Digit> mOutputDigits; ///< Container with output cells
std::vector<TriggerRecord> mOutputTriggerRecords; ///< Container with output cells
std::vector<RawDecoderError> mOutputHWErrors; ///< Errors occured in reading data
bool mIsMuteDecoderErrors = false; ///< mute errors for 10 minutes
int mDecoderErrorsCounterWhenMuted = 0; ///< errors counter while errors are muted
int mDecoderErrorsPerMinute = 0; ///< errors per minute counter
int mMinutesPassed = 0; ///< runtime duration in minutes
std::chrono::time_point<std::chrono::system_clock> mStartTime; ///< Time of start of decoding
std::chrono::time_point<std::chrono::system_clock> mTimeWhenMuted; ///< Time when muted errors
};

/// \brief Creating DataProcessorSpec for the CPV Digit Converter Spec
Expand Down
58 changes: 52 additions & 6 deletions Detectors/CPV/workflow/src/RawToDigitConverterSpec.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ using Lifetime = o2::framework::Lifetime;

void RawToDigitConverterSpec::init(framework::InitContext& ctx)
{
mStartTime = std::chrono::system_clock::now();
mDecoderErrorsPerMinute = 0;
mIsMuteDecoderErrors = false;

LOG(debug) << "Initializing RawToDigitConverterSpec...";
// Pedestal flag true/false
LOG(info) << "Pedestal run: " << (mIsPedestalData ? "YES" : "NO");
Expand All @@ -56,6 +60,23 @@ void RawToDigitConverterSpec::init(framework::InitContext& ctx)

void RawToDigitConverterSpec::run(framework::ProcessingContext& ctx)
{
// check timers if we need mute/unmute error reporting
auto now = std::chrono::system_clock::now();
if (mIsMuteDecoderErrors) { // check if 10-minutes muting period passed
if (((now - mTimeWhenMuted) / std::chrono::minutes(1)) >= 10) {
mIsMuteDecoderErrors = false; //unmute
if (mDecoderErrorsCounterWhenMuted) {
LOG(error) << "RawToDigitConverterSpec::run() : " << mDecoderErrorsCounterWhenMuted << " errors happened while it was muted ((";
}
mDecoderErrorsCounterWhenMuted = 0;
}
}
if (((now - mStartTime) / std::chrono::minutes(1)) > mMinutesPassed) {
mMinutesPassed = (now - mStartTime) / std::chrono::minutes(1);
LOG(debug) << "minutes passed: " << mMinutesPassed;
mDecoderErrorsPerMinute = 0;
}

// Cache digits from bunch crossings as the component reads timeframes from many links consecutively
std::map<o2::InteractionRecord, std::shared_ptr<std::vector<o2::cpv::Digit>>> digitBuffer; // Internal digit buffer
int firstEntry = 0;
Expand Down Expand Up @@ -120,11 +141,13 @@ void RawToDigitConverterSpec::run(framework::ProcessingContext& ctx)
try {
rawreader.next();
} catch (RawErrorType_t e) {
LOG(error) << "Raw decoding error " << (int)e;
if (!mIsMuteDecoderErrors) {
LOG(error) << "Raw decoding error " << (int)e;
}
//add error list
//RawErrorType_t is defined in O2/Detectors/CPV/reconstruction/include/CPVReconstruction/RawReaderMemory.h
//RawDecoderError(short c, short d, short g, short p, RawErrorType_t e)
mOutputHWErrors.emplace_back(25, 0, 0, 0, e); //Put general errors to non-existing ccId 25
mOutputHWErrors.emplace_back(-1, 0, 0, 0, e); //Put general errors to non-existing ccId -1
//if problem in header, abandon this page
if (e == RawErrorType_t::kRDH_DECODING) {
LOG(error) << "RDH decoding error. Skipping this TF";
Expand All @@ -139,17 +162,40 @@ void RawToDigitConverterSpec::run(framework::ProcessingContext& ctx)
auto mod = o2::raw::RDHUtils::getLinkID(rdh) + 2; //link=0,1,2 -> mod=2,3,4
//for now all modules are written to one LinkID
if (mod > o2::cpv::Geometry::kNMod || mod < 2) { //only 3 correct modules:2,3,4
LOG(error) << "module=" << mod << "do not exist";
mOutputHWErrors.emplace_back(25, mod, 0, 0, kRDH_INVALID); //Add non-existing modules to non-existing ccId 25 and dilogic = mod
continue; //skip STU mod
if (!mIsMuteDecoderErrors) {
LOG(error) << "RDH linkId corresponds to module " << mod << " which does not exist";
}
mOutputHWErrors.emplace_back(-1, mod, 0, 0, kRDH_INVALID); //Add non-existing modules to non-existing ccId -1 and dilogic = mod
continue;
}
o2::cpv::RawDecoder decoder(rawreader);
if (mIsMuteDecoderErrors) {
decoder.muteErrors();
}
RawErrorType_t err = decoder.decode();
int decoderErrors = 0;
for (auto errs : decoder.getErrors()) {
if (errs.ccId == -1) { // error related to wrong data format
decoderErrors++;
}
}
mDecoderErrorsPerMinute += decoderErrors;
// LOG(debug) << "RawDecoder found " << decoderErrors << " raw format errors";
// LOG(debug) << "Now I have " << mDecoderErrorsPerMinute << " errors for current minute";
if (mIsMuteDecoderErrors) {
mDecoderErrorsCounterWhenMuted += decoder.getErrors().size();
} else {
if (mDecoderErrorsPerMinute > 10) { // mute error reporting for 10 minutes
LOG(warning) << "> 10 raw decoder error messages per minute, muting it for 10 minutes";
mIsMuteDecoderErrors = true;
mTimeWhenMuted = std::chrono::system_clock::now();
}
}

if (!(err == kOK || err == kOK_NO_PAYLOAD)) {
//TODO handle severe errors
//TODO: probably careful conversion of decoder errors to Fitter errors?
mOutputHWErrors.emplace_back(25, mod, 0, 0, err); //assign general RDH errors to non-existing ccId 25 and dilogic = mod
mOutputHWErrors.emplace_back(-1, mod, 0, 0, err); //assign general RDH errors to non-existing ccId -1 and dilogic = mod
}

std::shared_ptr<std::vector<o2::cpv::Digit>> currentDigitContainer;
Expand Down

0 comments on commit b6c90b3

Please sign in to comment.