Skip to content

Commit

Permalink
Transmit recommendations updated
Browse files Browse the repository at this point in the history
Below is the list of changes updated in transmit part.
	1. As per the recommendation followed max page size is 1000.
	2. Removed next_url and jwt_token in metadata. As per the suggestion using page_size and page_number in metadata.
	3. Removed regex parsing.
	4. If page_number is more than 1000 it will throw the error.
  • Loading branch information
thangaraj-ramesh committed Apr 11, 2024
1 parent 1d2f3e9 commit 6129cd1
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 180 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def __init__(self, pattern: Pattern, data_model_mapper, options):
@staticmethod
def load_json(rel_path_of_file) -> dict:
""" Consumes a json file and returns a dictionary
:param rel_path_of_file: str
:return: dict """
:param rel_path_of_file: (str) json file path
:return: json: (dict) loaded json """
_json_path = path.dirname(path.realpath(__file__)) + "/" + rel_path_of_file
try:
if path.exists(_json_path):
Expand All @@ -55,11 +55,11 @@ def load_json(rel_path_of_file) -> dict:
def _format_set(self, values, mapped_field_type, expression, mapped_fields_array) -> str:
"""
Formats value in the event of set operation
:param values: list
:param mapped_field_type: str
:param expression: object
:param mapped_fields_array: object
:return formatted value
:param values: (list) list of values
:param mapped_field_type: (str) type of the field
:param expression: (object) ANTLR parsed expression object
:param mapped_fields_array: (list) list of mapped fields
:return formatted value: (string) formatted value for the IN operator
"""
gen = values.element_iterator()
formatted_list = []
Expand All @@ -83,17 +83,17 @@ def _format_set(self, values, mapped_field_type, expression, mapped_fields_array
def _format_value(value) -> str:
"""
Formats value in the event of equality, like, subset operation
:param value
:return formatted value
:param value: (str) input value
:return formatted value: (str) formatted value for other than IN operator
"""
return f'\"{value}\"'

@staticmethod
def _format_datetime(value) -> int:
"""
Converts timestamp to epoch
:param: value
:return: converted epoch value, int
:param value: (str) UTC timestamp
:return: converted_time: (int) epoch value
"""
try:
time_pattern = '%Y-%m-%dT%H:%M:%S.%fZ'
Expand All @@ -109,7 +109,9 @@ def _format_datetime(value) -> int:
def _parse_time_range(qualifier, time_range) -> list:
"""
Converts qualifier timestamp to epoch
return: list of converted epoch values
:param qualifier: (str) UTC timestamp
:param time_range: (int) time range in minutes
return: converted_timestamp: (int) list of converted epoch values
"""
try:
compile_timestamp_regex = re.compile(START_STOP_PATTERN)
Expand All @@ -133,8 +135,8 @@ def _parse_time_range(qualifier, time_range) -> list:
def _get_mapped_field_type(self, mapped_field_array) -> str:
"""
Returns the type of mapped field array
:param mapped_field_array: list
:return: str
:param mapped_field_array: (list) list of mapped fields
:return: mapped_field_type: (str) type of the field
"""
mapped_field = mapped_field_array[0]
mapped_field_type = "string"
Expand All @@ -149,13 +151,14 @@ def _get_mapped_field_type(self, mapped_field_array) -> str:
def _check_value_comparator_support(self, value, comparator, mapped_field_type, mapped_fields_array,
expression) -> str:
"""
checks the comparator and value support
:param value
:param comparator
:param mapped_field_type: str
:param mapped_fields_array: list
:param expression: object
:return value: str
checks the comparator and value support.
raise the error for unsupported fields and operators.
:param value: (str) input value
:param comparator: (object) comparison operator
:param mapped_field_type: (str) type of field
:param mapped_fields_array: (list) list of mapped fields
:param expression: (object) ANTLR parsed expression object
:return value: (str) processed/formatted input value
"""
if mapped_field_type == "int":
if not str(value).isdigit():
Expand Down Expand Up @@ -202,8 +205,8 @@ def _check_value_comparator_support(self, value, comparator, mapped_field_type,
def _lookup_comparison_operator(self, expression_operator) -> str:
"""
lookup operators support in nozomi
:param expression_operator:object
:return str
:param expression_operator: (object) contains comparison operator
:return (str) comparator
"""
if str(expression_operator) not in self.comparator_lookup:
raise NotImplementedError(
Expand All @@ -214,10 +217,10 @@ def _lookup_comparison_operator(self, expression_operator) -> str:
def _eval_comparison_value(self, expression, mapped_field_type, mapped_fields_array) -> str:
"""
Function for parsing comparison expression value
:param expression: expression object
:param mapped_field_type: str
:param mapped_fields_array: list object
:return: formatted expression value
:param expression: (object) ANTLR parsed expression object
:param mapped_field_type: (str) type of field
:param mapped_fields_array: (list) list of mapped fields
:return value: (str) processed/formatted input value
"""
if expression.comparator == ComparisonComparators.In:
value = self._format_set(expression.value, mapped_field_type, expression, mapped_fields_array)
Expand All @@ -236,9 +239,9 @@ def _eval_comparison_value(self, expression, mapped_field_type, mapped_fields_ar
def _add_qualifier(self, query, qualifier) -> list:
"""
Convert the qualifier into epoch time and append in the query.
params: query : list
params: qualifier
return: query : list
params: query: (list) list of queries
params: qualifier: (str) start and stop UTC timestamp
return: query: (list) list of queries attached with timestamp
"""
query_qualifier = []
time_range = QueryStringPatternTranslator._parse_time_range(qualifier, self.options['time_range'])
Expand All @@ -253,9 +256,10 @@ def _handle_threat_name(formatted_value, comparator) -> str:
Handle threat name search
if threat_name = 'threat' means threat_name != ""
if threat_name == 'alert' means threat_name == ""
params: formatted_value : str
params: comparator : str
return: formatted_value , comparator
params: formatted_value: (str) input value
params: comparator: (str) comparison operator
return: formatted_value : (str) input value
comparator: (str) comparison operator
"""
reverse_comparator = {'==': '!=', '!=': '=='}
if formatted_value == '"threat"':
Expand All @@ -266,11 +270,11 @@ def _handle_threat_name(formatted_value, comparator) -> str:
def _parse_mapped_fields(self, formatted_value, mapped_fields_array, mapped_field_type, expression) -> list:
"""
parse mapped fields into boolean expression
:param formatted_value: str
:param mapped_fields_array: list
:param mapped_field_type:str
:param expression: expression object
:return: list
:param formatted_value: (str) input value
:param mapped_fields_array: (list) list of mapped fields
:param mapped_field_type: (str) type of field
:param expression: (object) ANTLR parsed expression object
:return: (list) formatted query
"""
comparator = self._lookup_comparison_operator(expression.comparator)

Expand Down Expand Up @@ -324,9 +328,11 @@ def _parse_mapped_fields(self, formatted_value, mapped_fields_array, mapped_fiel
def check_common_timestamp(query_01, query_02):
"""
Check the queries contains same timestamp
:param query_01: str
:param query_02: str
:return query_01_without_timestamp str, query_02_without_timestamp str, timestamp str
:param query_01: (str) first query
:param query_02: (str) second query
:return query_01_without_timestamp: (str) first query without timestamp
query_02_without_timestamp: (str) second query without timestamp
timestamp: (str) common timestamp from query
"""
# Find the index where timestamp starts in the query string
query_01_timestamp_index = query_01.find('| where record_created_at>=')
Expand All @@ -338,19 +344,24 @@ def check_common_timestamp(query_01, query_02):
query_01_without_timestamp = query_01[:query_01_timestamp_index]
query_02_without_timestamp = query_02[:query_02_timestamp_index]
return query_01_without_timestamp, query_02_without_timestamp, timestamp
# If queries do not have a common timestamp, return None.
return query_01, query_02, None

@staticmethod
def _split_and_query_combine_by_or(query_01, query_02):
"""
Combine AND constructed query with OR operator
:param query_01: str
:param query_02: str
:return query_list: list
:param query_01: (str) first query
:param query_02: (str) second query
:return query_list: (list) first and second query combined by OR operator
"""
# split the query using the AND operator
query_01_split_query = query_01.split(' | ')
query_02_split_query = query_02.split(' | ')
query_list = []

# combine using the OR operator
# ex:(v1 AND v2) OR v3 -> (v1 or v3) AND (v2 or v3)
for q1 in query_01_split_query:
for q2 in query_02_split_query:
query_list.append(f'{q1.strip()} OR {q2.replace("where ", "").strip()}')
Expand All @@ -360,18 +371,22 @@ def _split_and_query_combine_by_or(query_01, query_02):
def _check_max_query_length(query_01, query_02, combined_query, query_list):
"""
Check the combined query its less than max_query_length
:param query_01: str
:param query_02: str
:param combined_query: str
:param query_list: list
:return query_list: list
:param query_01: (str) first query
:param query_02: (str) second query
:param combined_query: (str) first and second query combined
:param query_list: (list) query list contains processed queries
:return query_list: (list) query list contains processed queries
"""
if len(combined_query) > MAX_QUERY_LENGTH:
# If the combined query length is greater than the maximum query length,
# will treat each query as a separate query.
if query_01 not in query_list:
query_list.append(query_01)
if query_02 not in query_list:
query_list.append(query_02)
else:
# If the combined query length is less than the maximum query length,
# will add it to the processed query list.
query_list.append(combined_query)
return query_list

Expand All @@ -382,10 +397,10 @@ def combine_or_queries(self, expression_01, expression_02, operator='OR') -> lis
otherwise will treat each query as separate query.
ex: A , B are two queries. If A OR B is less than max query length returns A OR B
If A OR B is more than max query length returns [ A , B ]
:param expression_01: expression object
:param expression_02: expression object
:param operator: str
:return query list
:param expression_01: (list) first query
:param expression_02: (list) second query
:param operator: (str) operator
:return query: (list) list of combined queries
"""
query_list = []

Expand All @@ -395,11 +410,14 @@ def combine_or_queries(self, expression_01, expression_02, operator='OR') -> lis
for row_01 in expression_01:
combined_flag = False
for row_02 in expression_02:
# combine the query length is less than the max query length
if len(row_01) + len(row_02) < MAX_QUERY_LENGTH and not combined_flag:
combined_flag = True
# Queries have a timestamp.
if 'record_created_at' in row_01 and 'record_created_at' in row_02:
row_01_without_timestamp, row_02_without_timestamp, common_timestamp = \
self.check_common_timestamp(row_01, row_02)
# Queries have a common timestamp.
if common_timestamp:
split_query_list = self._split_and_query_combine_by_or(row_01_without_timestamp,
row_02_without_timestamp)
Expand All @@ -409,6 +427,7 @@ def combine_or_queries(self, expression_01, expression_02, operator='OR') -> lis
if row_01 not in query_list:
query_list.append(row_01)
query_list.append(row_02)
# Query contains the AND operator.
elif ' | ' in row_01 or ' | ' in row_02:
split_query_list = self._split_and_query_combine_by_or(row_01, row_02)
combined_query = ' | '.join(split_query_list)
Expand All @@ -427,16 +446,19 @@ def combine_or_queries(self, expression_01, expression_02, operator='OR') -> lis
def combine_and_queries(self, expression_01, expression_02, operator) -> list:
"""
combine the queries using and operator
:param expression_01: expression object
:param expression_02: expression object
:param operator: string
:return query list
:param expression_01: (list) first query
:param expression_02: (list) second query
:param operator: (str) operator
:return query: (list) list of combined queries
"""
query_list = []

for row_01 in expression_01:
for row_02 in expression_02:
# combine the query using the AND operator
combined_query = f'{row_01} {operator} {row_02}'
# combined query, if the query length exceeds the maximum query length,
# will treat each query as a separate query
if len(combined_query) > MAX_QUERY_LENGTH and not self.logged:
self.logged = True
logger.info('Unable to split the query. Query length is more than maximum length. '
Expand All @@ -447,7 +469,7 @@ def combine_and_queries(self, expression_01, expression_02, operator) -> list:
def _eval_combined_comparison_exp(self, expression) -> str:
"""
Function for parsing combined comparison expression
:param expression: expression object
:param expression: (object) ANTLR parsed expression object
"""
query = []
operator = self._lookup_comparison_operator(expression.operator)
Expand All @@ -466,8 +488,8 @@ def _eval_combined_comparison_exp(self, expression) -> str:
def _eval_combined_observation_exp(self, expression, qualifier=None) -> str:
"""
Function for parsing combined observation expression
:param expression: expression object
:param qualifier: qualifier
:param expression: (object) ANTLR parsed expression object
:param qualifier: (object) timestamp object
"""
expression_01 = self._parse_expression(expression.expr1, qualifier)
expression_02 = self._parse_expression(expression.expr2, qualifier)
Expand Down
Loading

0 comments on commit 6129cd1

Please sign in to comment.