From dd463427ea5110c1949a31f1a27640a883cd5b08 Mon Sep 17 00:00:00 2001 From: sasi Date: Mon, 22 Jul 2024 16:02:38 +0530 Subject: [PATCH 1/5] remove unclosed tags --- v2_utils.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/v2_utils.py b/v2_utils.py index 5e97cd5..ed9772d 100644 --- a/v2_utils.py +++ b/v2_utils.py @@ -27,28 +27,40 @@ def define_link_data(usernames): logging.info(f"{e}---define_link_data") return [] + def remove_unmatched_tags(text): try: - # Remove unmatched closing tags at the beginning of the string + # Remove unmatched closing tags at the beginning of the string text = re.sub(r'^\s*]+>\s*', '', text) - # Regex pattern to find matched or unmatched tags - pattern = re.compile(r'(<([^>]+)>.*?)|(<[^/][^>]*>.*)', re.DOTALL) + pattern = re.compile(r'(<([^>]+)>.*?)|(<[^/][^>]*>.*?)(?=<[^/][^>]*>|$)', re.DOTALL) matches = pattern.findall(text) - + cleaned_text = '' + open_tags = [] + for match in matches: if match[0]: # Full matched ... pairs cleaned_text += match[0] elif match[2]: # Unmatched opening tags + # Add the tag to the list of open tags + tag = re.match(r'<([^/][^>]*)>', match[2]) + if tag: + tag_name = tag.group(1).split()[0] + open_tags.append(tag_name) cleaned_text += match[2] - + + # Close any unmatched opening tags + while open_tags: + tag = open_tags.pop() + cleaned_text += f'' + return cleaned_text + except Exception as e: print(e) return text - def week_data_formatter(html_content, type): From ca8b54ade2650ce37559d681321fa755e20e675a Mon Sep 17 00:00:00 2001 From: sasi Date: Tue, 23 Jul 2024 16:21:09 +0530 Subject: [PATCH 2/5] testcases & markdown handler changes --- tests.py | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++ v2_utils.py | 46 +++++++++++++++++++- 2 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 tests.py diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..f9cf2e6 --- /dev/null +++ b/tests.py @@ -0,0 +1,122 @@ +import unittest +from v2_utils import remove_unmatched_tags +from app import app +import json,random + + +class CustomTestResult(unittest.TextTestResult): + def addSuccess(self, test): + super().addSuccess(test) + print(f"{test._testMethodName} - passed") + + +class CustomTestRunner(unittest.TextTestRunner): + resultclass = CustomTestResult + + +class TestRemoveUnmatchedTags(unittest.TestCase): + """ + Static test case input & output for check markdown handler function + """ + def test_remove_unmatched_tags_basic(self): + input_text = "
Test content

" + expected_output = "
Test content
" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_unmatched_opening(self): + input_text = "
Test content" + expected_output = "
Test content
" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_unmatched_closing(self): + input_text = "

Test content

" + expected_output = "

Test content

" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_nested_tags(self): + input_text = "

Test content

" + expected_output = "

Test content

" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_unmatched_nested_opening(self): + input_text = "

Test content

" + expected_output = "

Test content

" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_unmatched_nested_closing(self): + input_text = "
Test content

" + expected_output = "
Test content
" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_multiple_unmatched_tags(self): + input_text = "
Test

Content

Here" + expected_output = "
Test

Content

Here" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_text_with_no_tags(self): + input_text = "Plain text with no tags" + expected_output = "Plain text with no tags" + self.assertEqual(remove_unmatched_tags(input_text), expected_output) + + def test_remove_unmatched_tags_empty_string(self): + input_text = "" + expected_output = "" + self.assertEqual(len(remove_unmatched_tags(input_text)),len(expected_output)) + + +class TestIssuesEndpoints(unittest.TestCase): + + def setUp(self): + self.app = app.test_client() + self.app.testing = True + self.issues_data = None # To store issues data for use in subsequent tests + + # Fetch issues data during setup + self._fetch_issues_data() + + def _fetch_issues_data(self): + # Validate the /issues endpoint and store the issues data + response = self.app.get('/issues') + self.assertEqual(response.status_code, 200) + + data = json.loads(response.data) + self.issues_data = data.get('issues', []) + self.assertTrue(len(self.issues_data) > 0, "No issues found in response") + + def test_get_issues_success(self): + # Check if issues data is correctly fetched + self.assertIsNotNone(self.issues_data, "Issues data is not populated") + + def test_get_issues_detail_success(self): + # Ensure the /issues endpoint was successfully called and issues data is available + if not self.issues_data: + self.skipTest("Skipping detail test as /issues endpoint did not return data") + + # Use first data from /issues response to form the endpoint URL + + index = random.randrange(1,len(self.issues_data)-1) + sample_issue = self.issues_data[index]['issues'][0] + issue_id = sample_issue['id'] + orgname = self.issues_data[index]['org_name'] + + endpoint = f'/v2/issues/{orgname}/{issue_id}' + + response = self.app.get(endpoint) + self.assertEqual(response.status_code, 200) + + def test_get_repo_detail_success(self): + # Ensure the /issues endpoint was successfully called and issues data is available + if not self.issues_data: + self.skipTest("Skipping detail test as /issues endpoint did not return data") + + # Use first data from /issues response to form the endpoint URL + index = random.randrange(1,len(self.issues_data)-1) + orgname = self.issues_data[index]['org_name'] + endpoint = f'/issues/{orgname}' + response = self.app.get(endpoint) + self.assertEqual(response.status_code, 200) + + + +if __name__ == '__main__': + unittest.main(testRunner=CustomTestRunner()) diff --git a/v2_utils.py b/v2_utils.py index ed9772d..4c1d89a 100644 --- a/v2_utils.py +++ b/v2_utils.py @@ -27,15 +27,53 @@ def define_link_data(usernames): logging.info(f"{e}---define_link_data") return [] +def preprocess_nested_tags(text): + try: + segments = re.split(r'(<[^>]+>)', text) + tag_stack = [] + corrected_segments = [] + + for segment in segments: + if re.match(r'<[^/][^>]*>', segment): # Opening tag + tag_stack.append(segment) + corrected_segments.append(segment) + elif re.match(r']+>', segment): # Closing tag + if tag_stack and tag_stack[-1][1:].split()[0] == segment[2:].split()[0]: + tag_stack.pop() + corrected_segments.append(segment) + else: + continue # Ignore unmatched closing tag + else: + corrected_segments.append(segment) + + while tag_stack: + open_tag = tag_stack.pop() + tag_name = re.match(r'<([^ ]+)', open_tag).group(1) + corrected_segments.append(f'') + + return ''.join(corrected_segments) + + except Exception as e: + print(e,"error in preprocess_nested_tags function") + return text + + def remove_unmatched_tags(text): try: + # Preprocess text to handle unmatched nested tags + text = preprocess_nested_tags(text) + # Remove unmatched closing tags at the beginning of the string text = re.sub(r'^\s*]+>\s*', '', text) # Regex pattern to find matched or unmatched tags pattern = re.compile(r'(<([^>]+)>.*?)|(<[^/][^>]*>.*?)(?=<[^/][^>]*>|$)', re.DOTALL) matches = pattern.findall(text) + #If get text without html tags + if matches == []: + return text + cleaned_text = '' open_tags = [] @@ -55,12 +93,18 @@ def remove_unmatched_tags(text): tag = open_tags.pop() cleaned_text += f'' + # Remove extra unmatched angle brackets + cleaned_text = re.sub(r'>+', '>', cleaned_text) + cleaned_text = re.sub(r'<+', '<', cleaned_text) + return cleaned_text except Exception as e: print(e) return text - + + + def week_data_formatter(html_content, type): From 56c26e71711659d813a4a9adf882f7c0be014ad0 Mon Sep 17 00:00:00 2001 From: sasi Date: Wed, 24 Jul 2024 16:40:10 +0530 Subject: [PATCH 3/5] added ul tags for corretly render in FE --- v2_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/v2_utils.py b/v2_utils.py index 4c1d89a..5537392 100644 --- a/v2_utils.py +++ b/v2_utils.py @@ -96,6 +96,10 @@ def remove_unmatched_tags(text): # Remove extra unmatched angle brackets cleaned_text = re.sub(r'>+', '>', cleaned_text) cleaned_text = re.sub(r'<+', '<', cleaned_text) + + #For front end renders add ul tags + if not cleaned_text.strip().startswith("
    "): + return f"
      {text}
    " return cleaned_text From 125ff34bc5954b023eeba2634da5e002ab64cd8d Mon Sep 17 00:00:00 2001 From: sasi Date: Wed, 24 Jul 2024 19:34:18 +0530 Subject: [PATCH 4/5] var changes --- v2_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v2_utils.py b/v2_utils.py index 5537392..623b9da 100644 --- a/v2_utils.py +++ b/v2_utils.py @@ -99,7 +99,7 @@ def remove_unmatched_tags(text): #For front end renders add ul tags if not cleaned_text.strip().startswith("
      "): - return f"
        {text}
      " + return f"
        {cleaned_text}
      " return cleaned_text From fcaa6fb631f802b68ee7c9d2bbc13a9b9cc2260c Mon Sep 17 00:00:00 2001 From: sasi Date: Thu, 25 Jul 2024 16:01:23 +0530 Subject: [PATCH 5/5] week filter to find all tags --- v2_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/v2_utils.py b/v2_utils.py index 623b9da..5a34a64 100644 --- a/v2_utils.py +++ b/v2_utils.py @@ -115,15 +115,15 @@ def week_data_formatter(html_content, type): try: # Use regex to find week titles (e.g., Week 1, Week 2) and their corresponding task lists - week_matches = re.findall(r'(Week \d+)', html_content) - tasks_per_week = re.split(r'Week \d+', html_content)[1:] # Split the content by weeks and skip the first empty split + week_matches = re.findall(r'Week\s*-?\s*\d+', html_content) + tasks_per_week = re.split(r'Week\s*-?\s*\d+', html_content)[1:] # Split the content by weeks and skip the first empty split weekly_updates = [] if type == "Learnings": # tasks_per_week = re.split(r'

      Week \d+

      ', html_content)[1:] - tasks_per_week = re.split(r'(<.*?>Week \d+<.*?>)', html_content)[1:] - tasks_per_week = [tasks_per_week[i] for i in range(1, len(tasks_per_week), 2)] + tasks_per_week = re.split(r'Week\s*-?\s*\d+', html_content)[1:] + tasks_per_week = [tasks_per_week[i] for i in range(0, len(tasks_per_week))] for i, week in enumerate(week_matches): task_list_html = tasks_per_week[i] if i < len(tasks_per_week) else "" weekly_updates.append({