From dd463427ea5110c1949a31f1a27640a883cd5b08 Mon Sep 17 00:00:00 2001
From: sasi <sasikumar.s@cogniverselabs.com>
Date: Mon, 22 Jul 2024 16:02:38 +0530
Subject: [PATCH 1/5] remove unclosed tags

---
 v2_utils.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/v2_utils.py b/v2_utils.py
index 5e97cd5..ed9772d 100644
--- a/v2_utils.py
+++ b/v2_utils.py
@@ -27,28 +27,40 @@ def define_link_data(usernames):
         logging.info(f"{e}---define_link_data")
         return []
         
+
 def remove_unmatched_tags(text):
     try:
-       # Remove unmatched closing tags at the beginning of the string
+        # Remove unmatched closing tags at the beginning of the string
         text = re.sub(r'^\s*</[^>]+>\s*', '', text)
-        
         # Regex pattern to find matched or unmatched tags
-        pattern = re.compile(r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*)', re.DOTALL)
+        pattern = re.compile(r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*?)(?=<[^/][^>]*>|$)', re.DOTALL)
         matches = pattern.findall(text)
-
+        
         cleaned_text = ''
+        open_tags = []
+        
         for match in matches:
             if match[0]:  # Full matched <tag>...</tag> pairs
                 cleaned_text += match[0]
             elif match[2]:  # Unmatched opening <tag> tags
+                # Add the tag to the list of open tags
+                tag = re.match(r'<([^/][^>]*)>', match[2])
+                if tag:
+                    tag_name = tag.group(1).split()[0]
+                    open_tags.append(tag_name)
                 cleaned_text += match[2]
-        
+
+        # Close any unmatched opening tags
+        while open_tags:
+            tag = open_tags.pop()
+            cleaned_text += f'</{tag}>'
+
         return cleaned_text
+    
     except Exception as e:
         print(e)
         return text
     
-    
 
   
 def week_data_formatter(html_content, type):

From ca8b54ade2650ce37559d681321fa755e20e675a Mon Sep 17 00:00:00 2001
From: sasi <sasikumar.s@cogniverselabs.com>
Date: Tue, 23 Jul 2024 16:21:09 +0530
Subject: [PATCH 2/5] testcases & markdown handler changes

---
 tests.py    | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 v2_utils.py |  46 +++++++++++++++++++-
 2 files changed, 167 insertions(+), 1 deletion(-)
 create mode 100644 tests.py

diff --git a/tests.py b/tests.py
new file mode 100644
index 0000000..f9cf2e6
--- /dev/null
+++ b/tests.py
@@ -0,0 +1,122 @@
+import unittest
+from v2_utils import remove_unmatched_tags
+from app import app
+import json,random
+
+
+class CustomTestResult(unittest.TextTestResult):
+    def addSuccess(self, test):
+        super().addSuccess(test)
+        print(f"{test._testMethodName} - passed")
+
+
+class CustomTestRunner(unittest.TextTestRunner):
+    resultclass = CustomTestResult
+
+
+class TestRemoveUnmatchedTags(unittest.TestCase):
+    """
+    Static test case input & output for check markdown handler function
+    """
+    def test_remove_unmatched_tags_basic(self):
+        input_text = "<div>Test content</p></div>"
+        expected_output = "<div>Test content</div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+    
+    def test_remove_unmatched_tags_unmatched_opening(self):
+        input_text = "<div>Test content"
+        expected_output = "<div>Test content</div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+    
+    def test_remove_unmatched_tags_unmatched_closing(self):
+        input_text = "<div><span><p>Test content</div>"
+        expected_output = "<div><span><p>Test content</p></span></div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+    
+    def test_remove_unmatched_tags_nested_tags(self):
+        input_text = "<div><p>Test content</p></p></div>"
+        expected_output = "<div><p>Test content</p></div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_unmatched_nested_opening(self):
+        input_text = "<div><p>Test content</div>"
+        expected_output = "<div><p>Test content</p></div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+    
+    def test_remove_unmatched_tags_unmatched_nested_closing(self):
+        input_text = "<div>Test content</p></div>"
+        expected_output = "<div>Test content</div>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_multiple_unmatched_tags(self):
+        input_text = "<div>Test</div><p>Content</p><span>Here"
+        expected_output = "<div>Test</div><p>Content</p><span>Here</span>"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+
+    def test_remove_unmatched_tags_text_with_no_tags(self):
+        input_text = "Plain text with no tags"
+        expected_output = "Plain text with no tags"
+        self.assertEqual(remove_unmatched_tags(input_text), expected_output)
+    
+    def test_remove_unmatched_tags_empty_string(self):
+        input_text = ""
+        expected_output = ""
+        self.assertEqual(len(remove_unmatched_tags(input_text)),len(expected_output))
+        
+
+class TestIssuesEndpoints(unittest.TestCase):
+    
+    def setUp(self):
+        self.app = app.test_client()
+        self.app.testing = True
+        self.issues_data = None  # To store issues data for use in subsequent tests
+        
+        # Fetch issues data during setup
+        self._fetch_issues_data()
+    
+    def _fetch_issues_data(self):
+        # Validate the /issues endpoint and store the issues data
+        response = self.app.get('/issues')
+        self.assertEqual(response.status_code, 200)
+        
+        data = json.loads(response.data)
+        self.issues_data = data.get('issues', [])
+        self.assertTrue(len(self.issues_data) > 0, "No issues found in response")
+
+    def test_get_issues_success(self):
+        # Check if issues data is correctly fetched
+        self.assertIsNotNone(self.issues_data, "Issues data is not populated")
+    
+    def test_get_issues_detail_success(self):
+        # Ensure the /issues endpoint was successfully called and issues data is available
+        if not self.issues_data:
+            self.skipTest("Skipping detail test as /issues endpoint did not return data")
+        
+        # Use first data from /issues response to form the endpoint URL
+        
+        index = random.randrange(1,len(self.issues_data)-1)
+        sample_issue = self.issues_data[index]['issues'][0]
+        issue_id = sample_issue['id']
+        orgname = self.issues_data[index]['org_name']
+        
+        endpoint = f'/v2/issues/{orgname}/{issue_id}'
+        
+        response = self.app.get(endpoint)
+        self.assertEqual(response.status_code, 200)
+        
+    def test_get_repo_detail_success(self):
+        # Ensure the /issues endpoint was successfully called and issues data is available
+        if not self.issues_data:
+            self.skipTest("Skipping detail test as /issues endpoint did not return data")
+        
+        # Use first data from /issues response to form the endpoint URL
+        index = random.randrange(1,len(self.issues_data)-1)
+        orgname = self.issues_data[index]['org_name']
+        endpoint = f'/issues/{orgname}'        
+        response = self.app.get(endpoint)
+        self.assertEqual(response.status_code, 200)
+
+        
+
+if __name__ == '__main__':
+    unittest.main(testRunner=CustomTestRunner())
diff --git a/v2_utils.py b/v2_utils.py
index ed9772d..4c1d89a 100644
--- a/v2_utils.py
+++ b/v2_utils.py
@@ -27,15 +27,53 @@ def define_link_data(usernames):
         logging.info(f"{e}---define_link_data")
         return []
         
+def preprocess_nested_tags(text):
+    try:        
+        segments = re.split(r'(<[^>]+>)', text)
+        tag_stack = []
+        corrected_segments = []
+
+        for segment in segments:
+            if re.match(r'<[^/][^>]*>', segment):  # Opening tag
+                tag_stack.append(segment)
+                corrected_segments.append(segment)
+            elif re.match(r'</[^>]+>', segment):  # Closing tag
+                if tag_stack and tag_stack[-1][1:].split()[0] == segment[2:].split()[0]:
+                    tag_stack.pop()
+                    corrected_segments.append(segment)
+                else:
+                    continue  # Ignore unmatched closing tag
+            else:
+                corrected_segments.append(segment)
+
+        while tag_stack:
+            open_tag = tag_stack.pop()
+            tag_name = re.match(r'<([^ ]+)', open_tag).group(1)
+            corrected_segments.append(f'</{tag_name}>')
+
+        return ''.join(corrected_segments)
+
+    except Exception as e:
+        print(e,"error in preprocess_nested_tags function")
+        return text
+    
+    
 
 def remove_unmatched_tags(text):
     try:
+        # Preprocess text to handle unmatched nested tags
+        text = preprocess_nested_tags(text)
+        
         # Remove unmatched closing tags at the beginning of the string
         text = re.sub(r'^\s*</[^>]+>\s*', '', text)
         # Regex pattern to find matched or unmatched tags
         pattern = re.compile(r'(<([^>]+)>.*?</\2>)|(<[^/][^>]*>.*?)(?=<[^/][^>]*>|$)', re.DOTALL)
         matches = pattern.findall(text)
         
+        #If get text without html tags
+        if matches == []:
+            return text
+        
         cleaned_text = ''
         open_tags = []
         
@@ -55,12 +93,18 @@ def remove_unmatched_tags(text):
             tag = open_tags.pop()
             cleaned_text += f'</{tag}>'
 
+        # Remove extra unmatched angle brackets
+        cleaned_text = re.sub(r'>+', '>', cleaned_text)
+        cleaned_text = re.sub(r'<+', '<', cleaned_text)
+
         return cleaned_text
     
     except Exception as e:
         print(e)
         return text
-    
+
+
+
 
   
 def week_data_formatter(html_content, type):

From 56c26e71711659d813a4a9adf882f7c0be014ad0 Mon Sep 17 00:00:00 2001
From: sasi <sasikumar.s@cogniverselabs.com>
Date: Wed, 24 Jul 2024 16:40:10 +0530
Subject: [PATCH 3/5] added ul tags for corretly render in FE

---
 v2_utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/v2_utils.py b/v2_utils.py
index 4c1d89a..5537392 100644
--- a/v2_utils.py
+++ b/v2_utils.py
@@ -96,6 +96,10 @@ def remove_unmatched_tags(text):
         # Remove extra unmatched angle brackets
         cleaned_text = re.sub(r'>+', '>', cleaned_text)
         cleaned_text = re.sub(r'<+', '<', cleaned_text)
+        
+        #For front end renders add ul tags 
+        if not cleaned_text.strip().startswith("<ul>"):
+            return f"<ul>{text}</ul>"
 
         return cleaned_text
     

From 125ff34bc5954b023eeba2634da5e002ab64cd8d Mon Sep 17 00:00:00 2001
From: sasi <sasikumar.s@cogniverselabs.com>
Date: Wed, 24 Jul 2024 19:34:18 +0530
Subject: [PATCH 4/5] var changes

---
 v2_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/v2_utils.py b/v2_utils.py
index 5537392..623b9da 100644
--- a/v2_utils.py
+++ b/v2_utils.py
@@ -99,7 +99,7 @@ def remove_unmatched_tags(text):
         
         #For front end renders add ul tags 
         if not cleaned_text.strip().startswith("<ul>"):
-            return f"<ul>{text}</ul>"
+            return f"<ul>{cleaned_text}</ul>"
 
         return cleaned_text
     

From fcaa6fb631f802b68ee7c9d2bbc13a9b9cc2260c Mon Sep 17 00:00:00 2001
From: sasi <sasikumar.s@cogniverselabs.com>
Date: Thu, 25 Jul 2024 16:01:23 +0530
Subject: [PATCH 5/5] week filter to find all tags

---
 v2_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/v2_utils.py b/v2_utils.py
index 623b9da..5a34a64 100644
--- a/v2_utils.py
+++ b/v2_utils.py
@@ -115,15 +115,15 @@ def week_data_formatter(html_content, type):
     
     try:
         # Use regex to find week titles (e.g., Week 1, Week 2) and their corresponding task lists
-        week_matches = re.findall(r'(Week \d+)', html_content)
-        tasks_per_week = re.split(r'Week \d+', html_content)[1:]  # Split the content by weeks and skip the first empty split
+        week_matches = re.findall(r'Week\s*-?\s*\d+', html_content)
+        tasks_per_week = re.split(r'Week\s*-?\s*\d+', html_content)[1:]  # Split the content by weeks and skip the first empty split
 
         weekly_updates = []
 
         if type == "Learnings":
             # tasks_per_week = re.split(r'<h3>Week \d+</h3>', html_content)[1:]
-            tasks_per_week = re.split(r'(<.*?>Week \d+<.*?>)', html_content)[1:]
-            tasks_per_week = [tasks_per_week[i] for i in range(1, len(tasks_per_week), 2)]
+            tasks_per_week = re.split(r'Week\s*-?\s*\d+', html_content)[1:]
+            tasks_per_week = [tasks_per_week[i] for i in range(0, len(tasks_per_week))]
             for i, week in enumerate(week_matches):
                 task_list_html = tasks_per_week[i] if i < len(tasks_per_week) else ""
                 weekly_updates.append({