Skip to content

Commit

Permalink
syslogprocess sysline out of order tracking (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
jtmoon79 committed May 28, 2024
1 parent 7b2c8a7 commit d7f9bcc
Show file tree
Hide file tree
Showing 555 changed files with 2,001 additions and 1,541 deletions.
41 changes: 41 additions & 0 deletions logs/other/tests/dtf11-A-outoforder.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
2000-01-01 00:00:00 [dtf11-A]a
2000-02-01 00:00:00 [dtf11-A]ab
2000-03-01 00:00:00 [dtf11-A]abc
2000-04-01 00:00:00 [dtf11-A]abcd
2000-05-01 00:00:00 [dtf11-A]abcde
2000-06-01 00:00:00 [dtf11-A]abcdef
2000-07-01 00:00:00 [dtf11-A]abcdefg
2000-09-01 00:00:00 [dtf11-A]abcdefgh
2000-08-01 00:00:00 [dtf11-A]abcdefghi out of order! 1
2000-10-01 00:00:00 [dtf11-A]abcdefghij
2000-11-01 00:00:00 [dtf11-A]abcdefghijk
2000-12-02 00:00:00 [dtf11-A]abcdefghijkl
2000-12-03 00:00:00 [dtf11-A]abcdefghijkl
2000-12-04 00:00:00 [dtf11-A]abcdefghijkl
2000-12-05 00:00:00 [dtf11-A]abcdefghijkl
2000-12-06 00:00:00 [dtf11-A]abcdefghijkl
2000-12-07 00:00:00 [dtf11-A]abcdefghijkl
2000-12-08 00:00:00 [dtf11-A]abcdefghijkl
2000-12-09 00:00:00 [dtf11-A]abcdefghijkl
2000-12-10 00:00:00 [dtf11-A]abcdefghijkl
2000-12-11 00:00:00 [dtf11-A]abcdefghijkl
2000-12-12 00:00:00 [dtf11-A]abcdefghijkl
2000-12-13 00:00:00 [dtf11-A]abcdefghijkl
2000-12-14 00:00:00 [dtf11-A]abcdefghijkl
2000-12-15 00:00:00 [dtf11-A]abcdefghijkl
2000-12-16 00:00:00 [dtf11-A]abcdefghijkl
2000-12-17 00:00:00 [dtf11-A]abcdefghijkl
2000-12-18 00:00:00 [dtf11-A]abcdefghijkl
2000-12-19 00:00:00 [dtf11-A]abcdefghijkl
2000-12-20 00:00:00 [dtf11-A]abcdefghijkl
2000-12-21 00:00:00 [dtf11-A]abcdefghijkl
2000-12-22 00:00:00 [dtf11-A]abcdefghijkl
2000-12-23 00:00:00 [dtf11-A]abcdefghijkl
2000-12-24 00:00:00 [dtf11-A]abcdefghijkl
2000-12-27 00:00:00 [dtf11-A]abcdefghijkl
2000-12-26 00:00:00 [dtf11-A]abcdefghijkl out of order!! 2
2000-12-25 00:00:00 [dtf11-A]abcdefghijkl out of order!!! 3
2000-12-28 00:00:00 [dtf11-A]abcdefghijkl
2000-12-29 00:00:00 [dtf11-A]abcdefghijkl
2000-12-30 00:00:00 [dtf11-A]abcdefghijkl
2000-12-31 00:00:00 [dtf11-A]abcdefghijkl
5 changes: 5 additions & 0 deletions logs/other/tests/dtf7-5-LEVELS-outoforder.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[ERROR] 2000-01-01 00:00:01 -0400 [dtf7-5-LEVELS] 1
[WARNING] 2000-01-01 00:00:02 -0400 [dtf7-5-LEVELS] 22
[INFO] 2000-01-01 00:00:04 -0400 [dtf7-5-LEVELS] 333 out of order
[VERBOSE] 2000-01-01 00:00:03 -0400 [dtf7-5-LEVELS] 4444 out of order
[TRACE] 2000-01-01 00:00:05 -0400 [dtf7-5-LEVELS] 55555
3 changes: 3 additions & 0 deletions src/bin/s4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1762,6 +1762,8 @@ fn exec_syslogprocessor(
let mut sent_is_last: bool = false; // sanity check sending of `is_last`
let mut fo1: FileOffset = 0;
let search_more: bool;
// this is the first search for a sysline within the datetime filters
// it uses a binary find strategy
let result: ResultS3SyslineFind = syslogproc.find_sysline_between_datetime_filters(0);
match result {
ResultS3SyslineFind::Found((fo, syslinep)) => {
Expand Down Expand Up @@ -1820,6 +1822,7 @@ fn exec_syslogprocessor(
// start stage 3 - find all proceeding syslines acceptable to the passed filters
syslogproc.process_stage3_stream_syslines();
// the majority of sysline processing for this file occurs in this loop
// the remaining `find_` are linear strategy searches
let mut syslinep_last_opt: Option<SyslineP> = None;
loop {
// TODO: [2022/06/20] see note about refactoring `find` functions so
Expand Down
17 changes: 17 additions & 0 deletions src/printer/summary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1095,6 +1095,23 @@ fn print_summary_opt_processed(
"{}syslines high : {}",
indent2, summarysyslinereader.syslinereader_syslines_stored_highest
);
eprint!(
"{}out of order : ",
indent2,
);
if summarysyslinereader.syslinereader_out_of_order == 0 {
eprintln!("{}", summarysyslinereader.syslinereader_out_of_order);
} else {
let data = format!("{}", summarysyslinereader.syslinereader_out_of_order);
match print_colored_stderr(
COLOR_ERROR,
Some(*color_choice),
data.as_bytes(),
) {
Ok(_) => eprintln!(),
Err(e) => e_err!("print_colored_stderr: {:?}", e)
}
}
}
SummaryReaderData::FixedStruct((
_summaryblockreader,
Expand Down
75 changes: 62 additions & 13 deletions src/readers/syslinereader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,17 @@ pub type ResultParseDateTime = Result<FindDateTimeData>;
#[cfg(test)]
pub type SetDroppedSyslines = HashSet<FileOffset>;

/// Current search strategy for a `SyslineReader` for finding a syslog line
/// at or after a given datetime.
#[derive(Debug, Eq, PartialEq)]
pub enum FindStrategy {
/// search from the beginning of the file to the end.
Linear,
/// search from the beginning of the file and jumping to different
/// locations in the file in a _binary search_ manner.
Binary,
}

// -------------
// SyslineReader

Expand Down Expand Up @@ -197,7 +208,6 @@ pub struct SyslineReader {
/// Intended for `--summary`.
///
/// [`DateTimeL`]: crate::data::datetime::DateTimeL
// TODO: [2022/07/27] cost-savings: save the ref
// TODO: [2023/03/22] change behavior to be "first printed" instead of "first processed"
pub(super) dt_first: DateTimeLOpt,
pub(super) dt_first_prev: DateTimeLOpt,
Expand All @@ -207,10 +217,15 @@ pub struct SyslineReader {
/// Intended for `--summary`.
///
/// [`DateTimeL`]: crate::data::datetime::DateTimeL
// TODO: [2022/07/27] cost-savings: save the ref
// TODO: [2023/03/22] change behavior to be "last printed" instead of "last processed"
pub(super) dt_last: DateTimeLOpt,
pub(super) dt_last_prev: DateTimeLOpt,
/// Count of log messages found to be out of order.
out_of_order: Count,
/// Interior tracking of current search strategy.
///
/// Should only be set in stage-processing functions `process_*`.
pub(crate) find_strategy: FindStrategy,
/// `Count`s found patterns stored in `dt_patterns`.
/// "mirrors" the global [`DATETIME_PARSE_DATAS`].
/// Keys are indexes into `DATETIME_PARSE_DATAS`,
Expand Down Expand Up @@ -400,6 +415,8 @@ pub struct SummarySyslineReader {
pub syslinereader_datetime_first: DateTimeLOpt,
/// datetime latest seen (not necessarily reflective of entire file)
pub syslinereader_datetime_last: DateTimeLOpt,
/// count of log messages out of order
pub syslinereader_out_of_order: Count,
/// `SyslineReader::find_sysline`
pub syslinereader_find_sysline_lru_cache_hit: Count,
/// `SyslineReader::find_sysline`
Expand Down Expand Up @@ -508,6 +525,8 @@ impl SyslineReader {
dt_first_prev: None,
dt_last: None,
dt_last_prev: None,
out_of_order: 0,
find_strategy: FindStrategy::Binary,
dt_patterns_counts,
dt_patterns_indexes,
tz_offset,
Expand Down Expand Up @@ -1380,15 +1399,33 @@ impl SyslineReader {

/// Update the two statistic `DateTimeL` of
/// `self.dt_first` and `self.dt_last`.
// TODO: [2024/04] track log messages not in chronological order, similar to
// tracking done in `FixedStructReader::entries_out_of_order` and
// `EvtxReader::out_of_order`
// Print the result in the `--summary` output.
fn dt_first_last_update(
&mut self,
datetime: &DateTimeL,
) {
defñ!("({:?})", datetime);
defn!("({:?})", datetime);

defo!("find_strategy {:?}", self.find_strategy);
match self.find_strategy {
// if `FindStrategy::Binary` then the owning `SyslogProocessor`
// is still in a binary search stage
FindStrategy::Binary => {
defx!("return early; (FindStrategy::Binary)");
return;
}
FindStrategy::Linear => {}
}

defo!(
"dt_first={:?}, dt_first_prev={:?}",
self.dt_first, self.dt_first_prev,
);
defo!(
"dt_last={:?}, dt_last_prev={:?}",
self.dt_last, self.dt_last_prev,
);
let mut is_out_of_order: bool = false;

// TODO: the `dt_first` and `dt_last` are only for `--summary`,
// no need to always copy datetimes.
// Would be good to only run this when `if self.do_summary {...}`
Expand All @@ -1397,23 +1434,35 @@ impl SyslineReader {
if &dt_first_ > datetime {
self.dt_first_prev = self.dt_first;
self.dt_first = Some(*datetime);
} else if self.dt_first_prev.is_some() {
self.out_of_order += 1;
is_out_of_order = true;
defo!("out_of_order={} (dt_first)", self.out_of_order);
}
}
None => {
self.dt_first = Some(*datetime);
}
}
defo!("dt_first={:?}", self.dt_first.unwrap());

match self.dt_last {
Some(dt_last_) => {
if &dt_last_ < datetime {
self.dt_last_prev = self.dt_last;
self.dt_last = Some(*datetime);
} else if !is_out_of_order {
self.out_of_order += 1;
defo!("out_of_order={} (dt_last)", self.out_of_order);
}
}
None => {
self.dt_last = Some(*datetime);
}
}
defo!("dt_last={:?}", self.dt_last.unwrap());

defx!();
}

/// current count of `DateTimeParseInstrs` that have been used as tracked
Expand Down Expand Up @@ -2239,9 +2288,7 @@ impl SyslineReader {
defn!("({}, {:?})", fileoffset, year_opt);

if self.fileoffset_last > fileoffset && self.is_streamed_file() {
// TODO: [2024/05] after Issue #283 is resolved then this should
// become a debug_panic
de_wrn!(
debug_panic!(
"fileoffset {} > {} fileoffset_last for a streamed file {:?}",
fileoffset, self.fileoffset_last, self.path(),
);
Expand Down Expand Up @@ -2948,7 +2995,7 @@ impl SyslineReader {
defn!("({}, {:?}, {:?})", fileoffset, dt_filter_after, dt_filter_before);

let result: ResultS3SyslineFind;
if self.is_streamed_file() {
if self.find_strategy == FindStrategy::Linear || self.is_streamed_file() {
result = self.find_sysline_at_datetime_filter_linear_search(fileoffset, dt_filter_after);
} else {
result = self.find_sysline_at_datetime_filter_binary_search(fileoffset, dt_filter_after);
Expand Down Expand Up @@ -3058,6 +3105,7 @@ impl SyslineReader {
let syslinereader_ezcheck12d2_hit_max = self.ezcheck12d2_hit_max;
let syslinereader_datetime_first = self.dt_first;
let syslinereader_datetime_last = self.dt_last;
let syslinereader_out_of_order = self.out_of_order;

SummarySyslineReader {
syslinereader_syslines,
Expand All @@ -3068,6 +3116,9 @@ impl SyslineReader {
syslinereader_syslines_by_range_miss,
syslinereader_syslines_by_range_put,
syslinereader_patterns,
syslinereader_datetime_first,
syslinereader_datetime_last,
syslinereader_out_of_order,
syslinereader_find_sysline_lru_cache_hit,
syslinereader_find_sysline_lru_cache_miss,
syslinereader_find_sysline_lru_cache_put,
Expand All @@ -3089,8 +3140,6 @@ impl SyslineReader {
syslinereader_ezcheck12d2_hit,
syslinereader_ezcheck12d2_miss,
syslinereader_ezcheck12d2_hit_max,
syslinereader_datetime_first,
syslinereader_datetime_last,
}
}
}
11 changes: 10 additions & 1 deletion src/readers/syslogprocessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ pub use crate::readers::linereader::ResultS3LineFind;
pub use crate::readers::syslinereader::{
DateTimePatternCounts,
ResultS3SyslineFind,
FindStrategy,
SummarySyslineReader,
SyslineReader,
};
Expand Down Expand Up @@ -937,13 +938,17 @@ impl SyslogProcessor {
{
defo!("!dt_pattern_has_year()");
let mtime: SystemTime = self.mtime();
self.syslinereader.find_strategy = FindStrategy::Linear;
defo!("find_strategy {:?}", self.syslinereader.find_strategy);
match self.process_missing_year(mtime, filter_dt_after_opt) {
FileProcessingResultBlockZero::FileOk => {}
result => {
defx!("Bad result {:?}", result);
return result;
}
}
self.syslinereader.find_strategy = FindStrategy::Binary;
defo!("find_strategy {:?}", self.syslinereader.find_strategy);
}

defx!();
Expand All @@ -960,9 +965,13 @@ impl SyslogProcessor {
/// [streaming]: ProcessingStage#variant.Stage3StreamSyslines
/// [`find_sysline`]: self::SyslogProcessor#method.find_sysline
pub fn process_stage3_stream_syslines(&mut self) -> FileProcessingResultBlockZero {
defñ!();
defn!();
self.assert_stage(ProcessingStage::Stage2FindDt);
self.processingstage = ProcessingStage::Stage3StreamSyslines;
self.syslinereader.find_strategy = FindStrategy::Linear;
defo!("find_strategy {:?}", self.syslinereader.find_strategy);

defx!();

FileProcessingResultBlockZero::FileOk
}
Expand Down
20 changes: 14 additions & 6 deletions src/tests/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,15 +231,23 @@ lazy_static! {
"2000-01-02T03:04:05+00:00 VERBOSE: sysline #1\n2000-01-02T03:04:06+00:00 DEBUG: sysline #2\n";
pub static ref NTF_SYSLINE_2: NamedTempFile = create_temp_log(*NTF_SYSLINE_2_DATA);
pub static ref NTF_SYSLINE_2_PATH: FPath = ntf_fpath(&NTF_SYSLINE_2);
pub static ref NTF_SYSLINE_2_SYSLINE1_DT: DateTimeL = {
ymdhms(&*FO_0, 2000, 1, 2, 3, 4, 5)
};
pub static ref NTF_SYSLINE_2_SYSLINE2_DT: DateTimeL = {
ymdhms(&*FO_0, 2000, 1, 2, 3, 4, 6)
};
pub static ref NTF_SYSLINE_2_SYSLINE1_DT: DateTimeL = ymdhms(&*FO_0, 2000, 1, 2, 3, 4, 5);
pub static ref NTF_SYSLINE_2_SYSLINE2_DT: DateTimeL = ymdhms(&*FO_0, 2000, 1, 2, 3, 4, 6);
pub static ref NTF_SYSLINE_2_SZ: FileSz = {
(*NTF_SYSLINE_2_DATA).as_bytes().len() as FileSz
};

// 2 sysline file out of order

pub static ref NTF_SYSLINE_2_OOO_DATA: &'static str =
"2000-01-02T03:04:06+00:00 VERBOSE: sysline #1\n2000-01-02T03:04:05+00:00 DEBUG: sysline #2 out or order!\n";
pub static ref NTF_SYSLINE_2_OOO: NamedTempFile = create_temp_log(*NTF_SYSLINE_2_OOO_DATA);
pub static ref NTF_SYSLINE_2_OOO_PATH: FPath = ntf_fpath(&NTF_SYSLINE_2_OOO);
pub static ref NTF_SYSLINE_2_OOO_SYSLINE1_DT: DateTimeL = ymdhms(&*FO_0, 2000, 1, 2, 3, 4, 5);
pub static ref NTF_SYSLINE_2_OOO_SYSLINE2_DT: DateTimeL = ymdhms(&*FO_0, 2000, 1, 2, 3, 4, 6);
pub static ref NTF_SYSLINE_2_OOO_SZ: FileSz = {
(*NTF_SYSLINE_2_OOO_DATA).as_bytes().len() as FileSz
};
}

pub const NTF_LOG_EMPTY_FILETYPE: FileType = FILETYPE_UTF8;
Expand Down
Loading

0 comments on commit d7f9bcc

Please sign in to comment.