From 97100dd8e47f893d2f039afaea8d2ea60c432436 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Mon, 13 Jan 2025 16:45:15 -0500
Subject: [PATCH 01/27] evals: add prefix nba evals

---
 evals/notebook_states.py                      | 59 +++++++++++++
 .../dataframe_transformation_tests.py         | 84 ++++++++++++++++++-
 2 files changed, 141 insertions(+), 2 deletions(-)

diff --git a/evals/notebook_states.py b/evals/notebook_states.py
index 6ee48b24b..8beb90917 100644
--- a/evals/notebook_states.py
+++ b/evals/notebook_states.py
@@ -170,4 +170,63 @@
 }
 
 """, '']
+)
+
+
+nba_players_df = pd.DataFrame({
+    'player_name': [
+        'LeBron James', 'Kevin Durant', 'Stephen Curry', 'Giannis Antetokounmpo',
+        'Kawhi Leonard', 'James Harden', 'Luka Dončić', 'Damian Lillard',
+        'Joel Embiid', 'Nikola Jokić', 'Anthony Davis', 'Chris Paul',
+        'Jayson Tatum', 'Zion Williamson', 'Devin Booker'
+    ],
+    'team': [
+        'Los Angeles Lakers', 'Brooklyn Nets', 'Golden State Warriors', 'Milwaukee Bucks',
+        'Los Angeles Clippers', 'Brooklyn Nets', 'Dallas Mavericks', 'Portland Trail Blazers',
+        'Philadelphia 76ers', 'Denver Nuggets', 'Los Angeles Lakers', 'Phoenix Suns',
+        'Boston Celtics', 'New Orleans Pelicans', 'Phoenix Suns'
+    ],
+    'position': [
+        'SF', 'PF', 'PG', 'PF',
+        'SF', 'SG', 'PG', 'PG',
+        'C', 'C', 'PF', 'PG',
+        'SF', 'PF', 'SG'
+    ],
+    'points_per_game': [
+        25.0, 27.0, 29.0, 28.0,
+        24.0, 25.0, 28.0, 27.0,
+        33.0, 26.0, 22.0, 18.0,
+        26.0, 22.0, 25.0
+    ]
+})
+
+NBA_PLAYERS_NOTEBOOK: NotebookState = NotebookState(
+    global_vars={'nba_players_df': nba_players_df.head(5)},
+    cell_contents=["""import pandas as pd
+nba_players_df = pd.DataFrame({
+    'player_name': [
+        'LeBron James', 'Kevin Durant', 'Stephen Curry', 'Giannis Antetokounmpo',
+        'Kawhi Leonard', 'James Harden', 'Luka Dončić', 'Damian Lillard',
+        'Joel Embiid', 'Nikola Jokić', 'Anthony Davis', 'Chris Paul',
+        'Jayson Tatum', 'Zion Williamson', 'Devin Booker'
+    ],
+    'team': [
+        'Los Angeles Lakers', 'Brooklyn Nets', 'Golden State Warriors', 'Milwaukee Bucks',
+        'Los Angeles Clippers', 'Brooklyn Nets', 'Dallas Mavericks', 'Portland Trail Blazers',
+        'Philadelphia 76ers', 'Denver Nuggets', 'Los Angeles Lakers', 'Phoenix Suns',
+        'Boston Celtics', 'New Orleans Pelicans', 'Phoenix Suns'
+    ],
+    'position': [
+        'SF', 'PF', 'PG', 'PF',
+        'SF', 'SG', 'PG', 'PG',
+        'C', 'C', 'PF', 'PG',
+        'SF', 'PF', 'SG'
+    ],
+    'points_per_game': [
+        25.0, 27.0, 29.0, 28.0,
+        24.0, 25.0, 28.0, 27.0,
+        33.0, 26.0, 22.0, 18.0,
+        26.0, 22.0, 25.0
+    ]
+})""", '']
 )
\ No newline at end of file
diff --git a/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py b/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py
index 966e23c8c..d9cebe9cc 100644
--- a/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py
+++ b/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py
@@ -1,6 +1,7 @@
 
 
-from evals.eval_types import InlineCodeCompletionTestCase
+from evals.eval_types import CodeGenTestCaseCore, InlineCodeCompletionTestCase
+from evals.notebook_states import NBA_PLAYERS_NOTEBOOK
 from evals.test_cases.chat_tests.dataframe_transformation_tests import CONVERT_ANNUAL_INCOME_TO_FLOAT, CONVERT_INTEREST_RATE_TO_INT, CONVERT_KILOMETERS_DRIVEN_TO_FLOAT, DATETIME_CONVERSION, EXTRACT_YEAR_FROM_STRING_DATE, FILTER_ANNUAL_INCOME_AND_LOAN_CONDITION, FILTER_ANNUAL_INCOME_GREATER_THAN_100K, NUMBER_OF_BMW_FORD_TOYOTA_FIRST_OWNER_FUNCTION, REPLACE_UNDERSCORE_WITH_SPACE_IN_COLUMN_NAMES, SEPARATE_DATA_BY_COLUMN_VALUE, WEIGHTED_AVERAGE_INTEREST_RATE
 
 
@@ -289,5 +290,84 @@
         type_tags=['code_completion'],
     ),
 
-
+    InlineCodeCompletionTestCase(
+        name="nba_players_follow_prefix_pattern",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=NBA_PLAYERS_NOTEBOOK,
+            expected_code="""
+lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
+nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
+warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
+bucks_players = nba_players_df[nba_players_df['team'] == 'Milwaukee Bucks']
+""",
+            workflow_tags=["df_transformation", "pandas"],
+        ),
+        prefix="""
+lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
+nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
+warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
+buck""",
+        suffix="""""",
+        type_tags=['code_completion'],
+    ),
+        InlineCodeCompletionTestCase(
+        name="nba_players_follow_prefix_pattern_bucks",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=NBA_PLAYERS_NOTEBOOK,
+            expected_code="""
+lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
+nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
+warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
+bucks_players = nba_players_df[nba_players_df['team'] == 'Milwaukee Bucks']
+""",
+            workflow_tags=["df_transformation", "pandas"],
+        ),
+        prefix="""
+lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
+nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
+warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
+""",
+        suffix="""""",
+        type_tags=['code_completion'],
+    ),
+    InlineCodeCompletionTestCase(
+        name="nba_players_follow_prefix_pattern_mavs",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=NBA_PLAYERS_NOTEBOOK,
+            expected_code="""
+lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
+nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
+warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
+mavericks_players = nba_players_df[nba_players_df['team'] == 'Dallas Mavericks']
+""",
+            workflow_tags=["df_transformation", "pandas"],
+        ),
+        prefix="""
+lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
+nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
+warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
+mav
+""",
+        suffix="""""",
+        type_tags=['code_completion'],
+    ),
+    InlineCodeCompletionTestCase(
+        name="nba_players_follow_prefix_pattern_warrios",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=NBA_PLAYERS_NOTEBOOK,
+            expected_code="""
+lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
+nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
+warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
+""",
+            workflow_tags=["df_transformation", "pandas"],
+        ),
+        prefix="""
+lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
+nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
+warr
+""",
+        suffix="""""",
+        type_tags=['code_completion'],
+    ),
 ]
\ No newline at end of file

From edd58df0eade6f78d6fa8d49c84c4a871c829224 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Mon, 13 Jan 2025 16:50:03 -0500
Subject: [PATCH 02/27] evals: add print tests

---
 .../misc_tests.py                             | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/evals/test_cases/inline_code_completion_tests/misc_tests.py b/evals/test_cases/inline_code_completion_tests/misc_tests.py
index 9a1776b8b..ec4567a10 100644
--- a/evals/test_cases/inline_code_completion_tests/misc_tests.py
+++ b/evals/test_cases/inline_code_completion_tests/misc_tests.py
@@ -14,4 +14,42 @@
         suffix="""""",
         type_tags=["no_expressed_intent"],
     ),
+    InlineCodeCompletionTestCase(
+        name="print_hi_with_prefix",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="print('Hi')",
+            workflow_tags=["misc"],
+        ),
+        prefix="""#Print 'Hi'
+pri""",
+        suffix="""""",
+        type_tags=["comment_following"],
+    ),
+    InlineCodeCompletionTestCase(
+        name="print_hi_most_of_line",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="print('Hi')",
+            workflow_tags=["misc"],
+        ),
+        prefix="""#Print 'Hi'
+print('Hi""",
+        suffix="""""",
+        type_tags=["comment_following"],
+    ),
+    # No expressed intent left in the line
+    InlineCodeCompletionTestCase(
+        name="print_hi_finished_line",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="print('Hi')",
+            workflow_tags=["misc"],
+        ),
+        prefix="""#Print 'Hi'
+print('Hi')""",
+        suffix="""""",
+        type_tags=["comment_following"],
+    ),
+
 ]

From 46528aa525389bbeb6a669116234095e94d38572 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Mon, 13 Jan 2025 17:27:30 -0500
Subject: [PATCH 03/27] evals: strip prefix starting line

---
 evals/test_runners/code_gen_test_runner.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/evals/test_runners/code_gen_test_runner.py b/evals/test_runners/code_gen_test_runner.py
index f55afcf13..3d28852c9 100644
--- a/evals/test_runners/code_gen_test_runner.py
+++ b/evals/test_runners/code_gen_test_runner.py
@@ -88,6 +88,21 @@ def run_code_gen_test(
         # We always add a newline between the current_cell_contents and the prefix. 
         # But we don't add a newline between the prefix -> ai_generated_code -> suffix, 
         # because the inline code completion can occur in the middle of a line. 
+
+        def strip_last_line_of_prefix_from_ai_generated_code(prefix: str, ai_generated_code: str) -> str:
+            # Remove the last line of the prefix
+            prefix_lines = prefix.split("\n")
+            last_prefix_line = prefix_lines[-1]
+
+            # If the ai_generated_code starts with the prefix_line, remove it
+            if ai_generated_code.startswith(last_prefix_line):
+                ai_generated_code = ai_generated_code[len(last_prefix_line):]
+
+            return ai_generated_code
+
+
+        ai_generated_code = strip_last_line_of_prefix_from_ai_generated_code(test.prefix or "", ai_generated_code)
+
         actual_code = current_cell_contents_script + "\n" + (test.prefix or "") + ai_generated_code + (test.suffix or "")
 
     # Execute the code and check if they produce the same results

From 83c0e4712ea8551c5876960dc358965d591b0652 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Tue, 14 Jan 2025 11:23:44 -0500
Subject: [PATCH 04/27] evals: remove blank line from prefix

---
 .../dataframe_transformation_tests.py                       | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py b/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py
index d9cebe9cc..b04a404ae 100644
--- a/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py
+++ b/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py
@@ -346,8 +346,7 @@
 lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
 nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
 warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
-mav
-""",
+mav""",
         suffix="""""",
         type_tags=['code_completion'],
     ),
@@ -365,8 +364,7 @@
         prefix="""
 lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
 nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
-warr
-""",
+warr""",
         suffix="""""",
         type_tags=['code_completion'],
     ),

From 760ecbf40b0ddea9aac60041e04f5e6d3eb8e232 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Tue, 14 Jan 2025 11:29:09 -0500
Subject: [PATCH 05/27] evals: strip last line of prefix

---
 evals/test_runners/code_gen_test_runner.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/evals/test_runners/code_gen_test_runner.py b/evals/test_runners/code_gen_test_runner.py
index 3d28852c9..cebdb40da 100644
--- a/evals/test_runners/code_gen_test_runner.py
+++ b/evals/test_runners/code_gen_test_runner.py
@@ -85,24 +85,28 @@ def run_code_gen_test(
     if test.test_type == 'chat':
         actual_code = current_cell_contents_script + "\n" + ai_generated_code
     else:
-        # We always add a newline between the current_cell_contents and the prefix. 
-        # But we don't add a newline between the prefix -> ai_generated_code -> suffix, 
-        # because the inline code completion can occur in the middle of a line. 
-
+        
         def strip_last_line_of_prefix_from_ai_generated_code(prefix: str, ai_generated_code: str) -> str:
             # Remove the last line of the prefix
             prefix_lines = prefix.split("\n")
             last_prefix_line = prefix_lines[-1]
 
             # If the ai_generated_code starts with the prefix_line, remove it
-            if ai_generated_code.startswith(last_prefix_line):
+            # This is actually not enough. This only handles the case where the entire last line of the prefix
+            # is repeated. However, in an eval like `convert_annual_income_to_float_no_comment`, the test fails
+            # because the AI repeats the last two letters of prefix, writing out the code 'flfloat' instead of 'float'.
+            if ai_generated_code.startswith(last_prefix_line) and last_prefix_line != "":
+                print(f"STRIPPING {last_prefix_line} from {ai_generated_code}")
                 ai_generated_code = ai_generated_code[len(last_prefix_line):]
+                print(f"RESULT: {ai_generated_code}")
 
             return ai_generated_code
 
-
         ai_generated_code = strip_last_line_of_prefix_from_ai_generated_code(test.prefix or "", ai_generated_code)
 
+        # We always add a newline between the current_cell_contents and the prefix. 
+        # But we don't add a newline between the prefix -> ai_generated_code -> suffix, 
+        # because the inline code completion can occur in the middle of a line. 
         actual_code = current_cell_contents_script + "\n" + (test.prefix or "") + ai_generated_code + (test.suffix or "")
 
     # Execute the code and check if they produce the same results

From 495e5d664cbd372b82c5f0d30fd4de262cdba952 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Tue, 14 Jan 2025 11:35:12 -0500
Subject: [PATCH 06/27] evals: fix code_pattern_following_small_prefix eval

---
 .../test_cases/inline_code_completion_tests/variable_tests.py  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/evals/test_cases/inline_code_completion_tests/variable_tests.py b/evals/test_cases/inline_code_completion_tests/variable_tests.py
index b97bccace..0c0499c4d 100644
--- a/evals/test_cases/inline_code_completion_tests/variable_tests.py
+++ b/evals/test_cases/inline_code_completion_tests/variable_tests.py
@@ -84,8 +84,7 @@
             workflow_tags=["variable_declaration"],
         ),
         prefix="""
-a
-""",
+a""",
         suffix="""
 b = 2 # set b to 2
 c = 3 # set c to 3

From 06bfa2aaafec39148930b651b1416b2307e500d0 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Tue, 14 Jan 2025 11:39:33 -0500
Subject: [PATCH 07/27] evals: fix bugs in prompt

---
 .../inline_code_completion_prompts/prod_prompt_v3.py        | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/evals/prompts/inline_code_completion_prompts/prod_prompt_v3.py b/evals/prompts/inline_code_completion_prompts/prod_prompt_v3.py
index 8a6cb1fc1..d9ad1c3ff 100644
--- a/evals/prompts/inline_code_completion_prompts/prod_prompt_v3.py
+++ b/evals/prompts/inline_code_completion_prompts/prod_prompt_v3.py
@@ -56,12 +56,12 @@ def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) ->
 
 Code in the active code cell:
 ```python
-df['age'] = df[df['age'] > 23<cursor>]
+df['age'] = df[<cursor>['age'] > 23]
 ```
 
 Output:
 ```python
-]
+df
 ```
 </Example 2>
 
@@ -77,6 +77,8 @@ def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) ->
 
 Output:
 ```python
+
+x=1
 ```
 </Example 3>
 

From c971710dfab89f54b6cd6a4b072dd9c2c45a4167 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Tue, 14 Jan 2025 12:56:10 -0500
Subject: [PATCH 08/27] evals: add new inline code completion prompt

---
 evals/eval_types.py                           |   5 +
 .../__init__.py                               |   2 +
 .../prod_prompt_v4.py                         | 142 ++++++++++++++++++
 3 files changed, 149 insertions(+)
 create mode 100644 evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py

diff --git a/evals/eval_types.py b/evals/eval_types.py
index 5c2df076a..2bd9ba0e5 100644
--- a/evals/eval_types.py
+++ b/evals/eval_types.py
@@ -114,6 +114,10 @@ class InlineCodeCompletionPromptGenerator():
 
     def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) -> str:
         raise NotImplementedError("Subclasses must implement this method")
+ 
+    def post_process_output(self, output: str, prefix: str, suffix: str) -> str:
+        # Default implementation returns the output unchanged
+        return output
 
 class DebugPromptGenerator():
 
@@ -121,3 +125,4 @@ class DebugPromptGenerator():
 
     def get_prompt(self, error_message: str, notebook_state: NotebookState) -> str:
         raise NotImplementedError("Subclasses must implement this method")
+    
diff --git a/evals/prompts/inline_code_completion_prompts/__init__.py b/evals/prompts/inline_code_completion_prompts/__init__.py
index a47ef7b0d..d2f031ac0 100644
--- a/evals/prompts/inline_code_completion_prompts/__init__.py
+++ b/evals/prompts/inline_code_completion_prompts/__init__.py
@@ -2,10 +2,12 @@
 from evals.prompts.inline_code_completion_prompts.prod_prompt_v1 import prod_prompt_v1
 from evals.prompts.inline_code_completion_prompts.prod_prompt_v2 import prod_prompt_v2
 from evals.prompts.inline_code_completion_prompts.prod_prompt_v3 import prod_prompt_v3
+from evals.prompts.inline_code_completion_prompts.prod_prompt_v4 import prod_prompt_v4
 
 
 INLINE_CODE_COMPLETION_PROMPT_GENERATORS = [
     prod_prompt_v1,
     prod_prompt_v2,
     prod_prompt_v3,
+    prod_prompt_v4,
 ]
diff --git a/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py b/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
new file mode 100644
index 000000000..67b06d0bc
--- /dev/null
+++ b/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
@@ -0,0 +1,142 @@
+from evals.eval_types import InlineCodeCompletionPromptGenerator, NotebookState, ChatPromptGenerator
+
+__all__ = ['prod_prompt_v4']
+
+# This prompt makes one big strategy change: Instead of trying to get the AI to just complete fill in the blanks between the prefix and the suffix, 
+# it asks the AI to return the full line of code that matches the user's intent. Then, we post-process the AI's output
+# by making sure that it does not rewrite the last line of the prefix or the first line of the suffix.
+
+class _ProdPromptV4(InlineCodeCompletionPromptGenerator):
+    prompt_name = "prod_prompt_v4"
+
+    def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) -> str:
+    
+        return f"""You are a coding assistant that lives inside of JupyterLab. Your job is to help the user write code. 
+
+You're given the current code cell, the user's cursor position, and the variables defined in the notebook. The user's cursor is signified by the symbol <cursor>.
+
+CRITICAL FORMATTING RULES:
+1. Include a new line character at the start of your response if you want the code you are writing to be added on the line after the cursor. For example, if the cursor is at the end of a comment, you should start your response with a newline character so that the code you write is not added to the comment.
+2. If you are finishing a line of code that the user started, return the full line of code with no newline character at the start or end.
+3. Your response must preserve correct Python indentation and spacing
+
+Your job is to complete the code that matches the user's intent. Write the minimal code to achieve the user's intent. Don't expand upon the user's intent.
+
+<Example 1>
+Defined Variables: {{
+    'loan_multiplier': 1.5,
+    'sales_df': pd.DataFrame({{
+        'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
+        'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
+        'units_sold': [1, 2, 1, 4, 5],
+        'total_price': [10, 19.98, 13.99, 84.00, 500]
+    }})
+}}
+
+Code in the active code cell:
+```python
+import pandas as pd
+sales_df = pd.read_csv('./sales.csv')
+
+# Multiply the total_price column by the loan_multiplier<cursor>
+```
+
+Output:
+```python
+
+sales_df['total_price'] = sales_df['total_price'] * loan_multiplier
+```
+</Example 1>
+
+IMPORTANT: Notice in Example 1 that the output starts with a newline because the cursor was at the end of a comment. This newline is REQUIRED to maintain proper Python formatting.
+
+<Example 2>
+Defined Variables: {{
+    df: pd.DataFrame({{
+        'age': [20, 25, 22, 23, 29],
+        'name': ['Nawaz', 'Aaron', 'Charlie', 'Tamir', 'Eve'],
+    }})
+}}
+
+Code in the active code cell:
+```python
+df['age'] = df[<cursor>['age'] > 23]
+```
+
+Output:
+```python
+df['age'] = df[df['age'] > 23]
+```
+</Example 2>
+
+IMPORTANT: Notice in Example 2 that the output does NOT start with a newline because the cursor is in the middle of existing code.
+
+<Example 3>
+Defined Variables: {{}}
+
+Code in the active code cell:
+```python
+voters = pd.read_csv('./voters.csv')
+
+# Create a variable for pennsylvania voters, ohio voters, california voters, and texas voters
+pa_voters = voters[voters['state'] == 'PA']
+ohio_voters<cursor>
+```
+
+Output:
+```python
+ohio_voters = voters[voters['state'] == 'OH']
+ca_voters = voters[voters['state'] == 'CA']
+tx_voters = voters[voters['state'] == 'TX']
+```
+
+IMPORTANT: Notice in Example 3 that output does not start with a newline character because it wasnts to continue the line of code that the user started. Also notice the output contains three lines of code because that is the minimal code to achieve the user's intent.
+
+</Example 3>
+
+<Example 4>
+Defined Variables: {{}}
+
+Code in the active code cell:
+```python
+# Display the first 5 rows of the dataframe
+df.head()
+<cursor>
+```
+
+Output:
+```python
+```
+</Example 4>
+
+IMPORTANT: Notice in Example 4 that the output is empty becuase the user's intent is already complete.
+
+Your Task:
+
+Defined Variables: {notebook_state.global_vars}
+
+Code in the active code cell:
+```python
+{prefix}<cursor>{suffix}
+```
+
+Output:
+"""
+    
+    def post_process_output(self, output: str, prefix: str, suffix: str) -> str:
+
+        last_prefix_line = prefix.split("\n")[-1]
+        if output.startswith(last_prefix_line) and last_prefix_line != "":
+        # Remove the last line of the prefix if it is the same as the first line of the output
+            output = output[len(last_prefix_line):]
+
+        first_suffix_line = suffix.split("\n")[0]
+        if output.endswith(first_suffix_line) and first_suffix_line != "":
+            # Remove the first line of the suffix if it is the same as the last line of the output
+            output = output[:-len(first_suffix_line)]
+
+        return output
+
+prod_prompt_v4 = _ProdPromptV4()
+
+

From 00106f161ddd2261dd0a0934685e03f4c65dd26b Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Tue, 14 Jan 2025 12:56:31 -0500
Subject: [PATCH 09/27] evals: fix broken evals

---
 .../dataframe_transformation_tests.py         | 33 ++++++++++++++++---
 evals/test_runners/code_gen_test_runner.py    | 22 ++-----------
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py b/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py
index b04a404ae..e5ee23adf 100644
--- a/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py
+++ b/evals/test_cases/inline_code_completion_tests/dataframe_transformation_tests.py
@@ -291,7 +291,7 @@
     ),
 
     InlineCodeCompletionTestCase(
-        name="nba_players_follow_prefix_pattern",
+        name="nba_players_follow_prefix_pattern_bucks",
         test_case_core=CodeGenTestCaseCore(
             notebook_state=NBA_PLAYERS_NOTEBOOK,
             expected_code="""
@@ -301,6 +301,7 @@
 bucks_players = nba_players_df[nba_players_df['team'] == 'Milwaukee Bucks']
 """,
             workflow_tags=["df_transformation", "pandas"],
+            variables_to_compare=["lakers_players", "nets_players", "warriors_players", "bucks_players"],
         ),
         prefix="""
 lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
@@ -311,22 +312,23 @@
         type_tags=['code_completion'],
     ),
         InlineCodeCompletionTestCase(
-        name="nba_players_follow_prefix_pattern_bucks",
+        name="nba_players_follow_prefix_pattern_clippers",
         test_case_core=CodeGenTestCaseCore(
             notebook_state=NBA_PLAYERS_NOTEBOOK,
             expected_code="""
 lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
 nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
 warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
-bucks_players = nba_players_df[nba_players_df['team'] == 'Milwaukee Bucks']
+clippers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Clippers']
 """,
             workflow_tags=["df_transformation", "pandas"],
+            variables_to_compare=["lakers_players", "nets_players", "warriors_players", "clippers_players"],
         ),
         prefix="""
 lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
 nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
 warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
-""",
+clip""",
         suffix="""""",
         type_tags=['code_completion'],
     ),
@@ -341,6 +343,7 @@
 mavericks_players = nba_players_df[nba_players_df['team'] == 'Dallas Mavericks']
 """,
             workflow_tags=["df_transformation", "pandas"],
+            variables_to_compare=["lakers_players", "nets_players", "warriors_players", "mavericks_players"],
         ),
         prefix="""
 lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
@@ -360,6 +363,7 @@
 warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
 """,
             workflow_tags=["df_transformation", "pandas"],
+            variables_to_compare=["lakers_players", "nets_players", "warriors_players"],
         ),
         prefix="""
 lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
@@ -368,4 +372,23 @@
         suffix="""""",
         type_tags=['code_completion'],
     ),
-]
\ No newline at end of file
+    InlineCodeCompletionTestCase(
+        name="nba_players_follow_suffix_pattern_warriors",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=NBA_PLAYERS_NOTEBOOK,
+            expected_code="""
+lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
+nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
+warriors_players = nba_players_df[nba_players_df['team'] == 'Golden State Warriors']
+""",
+            workflow_tags=["df_transformation", "pandas"],
+            variables_to_compare=["lakers_players", "nets_players", "warriors_players"],
+        ),
+        prefix="""
+lakers_players = nba_players_df[nba_players_df['team'] == 'Los Angeles Lakers']
+nets_players = nba_players_df[nba_players_df['team'] == 'Brooklyn Nets']
+""",
+        suffix="""nba_players_df[nba_players_df['team'] == 'Golden State Warriors']""",
+        type_tags=['code_completion'],
+    ),
+]
diff --git a/evals/test_runners/code_gen_test_runner.py b/evals/test_runners/code_gen_test_runner.py
index cebdb40da..216922f33 100644
--- a/evals/test_runners/code_gen_test_runner.py
+++ b/evals/test_runners/code_gen_test_runner.py
@@ -82,27 +82,11 @@ def run_code_gen_test(
     ai_generated_code = get_open_ai_completion(prompt)
 
     # Construct the actual code
-    if test.test_type == 'chat':
+    if isinstance(prompt_generator, ChatPromptGenerator):
         actual_code = current_cell_contents_script + "\n" + ai_generated_code
     else:
-        
-        def strip_last_line_of_prefix_from_ai_generated_code(prefix: str, ai_generated_code: str) -> str:
-            # Remove the last line of the prefix
-            prefix_lines = prefix.split("\n")
-            last_prefix_line = prefix_lines[-1]
-
-            # If the ai_generated_code starts with the prefix_line, remove it
-            # This is actually not enough. This only handles the case where the entire last line of the prefix
-            # is repeated. However, in an eval like `convert_annual_income_to_float_no_comment`, the test fails
-            # because the AI repeats the last two letters of prefix, writing out the code 'flfloat' instead of 'float'.
-            if ai_generated_code.startswith(last_prefix_line) and last_prefix_line != "":
-                print(f"STRIPPING {last_prefix_line} from {ai_generated_code}")
-                ai_generated_code = ai_generated_code[len(last_prefix_line):]
-                print(f"RESULT: {ai_generated_code}")
-
-            return ai_generated_code
-
-        ai_generated_code = strip_last_line_of_prefix_from_ai_generated_code(test.prefix or "", ai_generated_code)
+        # Run the post-processing function
+        ai_generated_code = prompt_generator.post_process_output(ai_generated_code, test.prefix or "", test.suffix or "")
 
         # We always add a newline between the current_cell_contents and the prefix. 
         # But we don't add a newline between the prefix -> ai_generated_code -> suffix, 

From 48ec923bb22ecf4ca17bd6264556cbed7fe38f7e Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Tue, 14 Jan 2025 16:52:01 -0500
Subject: [PATCH 10/27] mito-ai: strip suffix and prefix

---
 .../Extensions/InlineCompleter/provider.ts    | 34 +++++++++++++++++--
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/mito-ai/src/Extensions/InlineCompleter/provider.ts b/mito-ai/src/Extensions/InlineCompleter/provider.ts
index b387e94b4..b7bf2574c 100644
--- a/mito-ai/src/Extensions/InlineCompleter/provider.ts
+++ b/mito-ai/src/Extensions/InlineCompleter/provider.ts
@@ -179,6 +179,7 @@ export class MitoAIInlineCompleter
 
       const prefix = this._getPrefix(request);
       const suffix = this._getSuffix(request);
+
       const variables = this._variableManager.variables;
       const prompt = createInlinePrompt(prefix, suffix, variables);
       const openAIFormattedMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
@@ -210,7 +211,7 @@ export class MitoAIInlineCompleter
       return {
         items: result.items.map(item => ({
           ...item,
-          insertText: this._cleanCompletion(item.content)
+          insertText: this._cleanCompletion(item.content, prefix, suffix)
         }))
       };
     } finally {
@@ -342,14 +343,41 @@ export class MitoAIInlineCompleter
     });
   }
 
-  private _cleanCompletion(rawCompletion: string) {
-    return rawCompletion
+  private _cleanCompletion(rawCompletion: string, prefix?: string, suffix?: string) {
+    let cleanedCompletion = rawCompletion
       .replace(/^```python\n?/, '')  // Remove opening code fence with optional python language
       .replace(/```$/, '')           // Remove closing code fence
       .replace(/\n$/, '')            // Remove trailing newline
 
+    console.log("IN CLEANING")
+    console.log('prefix', prefix)
+    console.log('suffix', suffix)
+
+    if (prefix) {
+      // Remove duplicate prefix content
+      const lastPrefixLine = prefix.split('\n').slice(-1)[0];
+      if (cleanedCompletion.startsWith(lastPrefixLine) && lastPrefixLine !== '') {
+        console.log(`Removing Prefix: ${lastPrefixLine} from ${cleanedCompletion}`)
+        cleanedCompletion = cleanedCompletion.slice(lastPrefixLine.length);
+      }
+
+    }
+
+    if (suffix) {
+      // Remove duplicate suffix content
+      const firstSuffixLine = suffix.split('\n')[0];
+      if (cleanedCompletion.endsWith(firstSuffixLine) && firstSuffixLine !== '') {
+        console.log(`Removing Suffix: ${firstSuffixLine} from ${cleanedCompletion}`)
+        cleanedCompletion = cleanedCompletion.slice(0, -firstSuffixLine.length);
+      }
+    }
+
+    console.log(cleanedCompletion)
+    return cleanedCompletion;
   }
 
+
+
   private _resetCurrentStream() {
     this._currentToken = '';
     if (this._currentStream) {

From 1448610b9460438a5a08de039f2567c9be65ed1b Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Tue, 14 Jan 2025 17:02:06 -0500
Subject: [PATCH 11/27] mito-ai: update prompt

---
 .../Extensions/InlineCompleter/provider.ts    |  8 ----
 mito-ai/src/prompts/InlinePrompt.tsx          | 47 ++++++++++++++-----
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/mito-ai/src/Extensions/InlineCompleter/provider.ts b/mito-ai/src/Extensions/InlineCompleter/provider.ts
index b7bf2574c..2742a20e1 100644
--- a/mito-ai/src/Extensions/InlineCompleter/provider.ts
+++ b/mito-ai/src/Extensions/InlineCompleter/provider.ts
@@ -349,30 +349,22 @@ export class MitoAIInlineCompleter
       .replace(/```$/, '')           // Remove closing code fence
       .replace(/\n$/, '')            // Remove trailing newline
 
-    console.log("IN CLEANING")
-    console.log('prefix', prefix)
-    console.log('suffix', suffix)
-
     if (prefix) {
       // Remove duplicate prefix content
       const lastPrefixLine = prefix.split('\n').slice(-1)[0];
       if (cleanedCompletion.startsWith(lastPrefixLine) && lastPrefixLine !== '') {
-        console.log(`Removing Prefix: ${lastPrefixLine} from ${cleanedCompletion}`)
         cleanedCompletion = cleanedCompletion.slice(lastPrefixLine.length);
       }
-
     }
 
     if (suffix) {
       // Remove duplicate suffix content
       const firstSuffixLine = suffix.split('\n')[0];
       if (cleanedCompletion.endsWith(firstSuffixLine) && firstSuffixLine !== '') {
-        console.log(`Removing Suffix: ${firstSuffixLine} from ${cleanedCompletion}`)
         cleanedCompletion = cleanedCompletion.slice(0, -firstSuffixLine.length);
       }
     }
 
-    console.log(cleanedCompletion)
     return cleanedCompletion;
   }
 
diff --git a/mito-ai/src/prompts/InlinePrompt.tsx b/mito-ai/src/prompts/InlinePrompt.tsx
index 2b0e222c9..db27695a3 100644
--- a/mito-ai/src/prompts/InlinePrompt.tsx
+++ b/mito-ai/src/prompts/InlinePrompt.tsx
@@ -5,15 +5,14 @@ export function createInlinePrompt(
     suffix: string,
     variables: Variable[]
 ): string {
-    const prompt = `You are a code completion assistant that lives inside of JupyterLab. Your job is to predict the rest of the code that the user has started to write.
+    const prompt = `You are a coding assistant that lives inside of JupyterLab. Your job is to help the user write code. 
 
 You're given the current code cell, the user's cursor position, and the variables defined in the notebook. The user's cursor is signified by the symbol <cursor>.
-
+    
 CRITICAL FORMATTING RULES:
-1. If the cursor appears at the end of a complete line (especially after a comment), ALWAYS start your code with a newline character
-2. If the cursor appears at the end of a function definition, ALWAYS start your code with a newline character
-3. If the cursor appears in the middle of existing code or in an incomplete line of code, do NOT add any newline characters
-4. Your response must preserve correct Python indentation and spacing
+1. Include a new line character at the start of your response if you want the code you are writing to be added on the line after the cursor. For example, if the cursor is at the end of a comment, you should start your response with a newline character so that the code you write is not added to the comment.
+2. If you are finishing a line of code that the user started, return the full line of code with no newline character at the start or end.
+3. Your response must preserve correct Python indentation and spacing
 
 Your job is to complete the code that matches the user's intent. Write the minimal code to achieve the user's intent. Don't expand upon the user's intent.
 
@@ -55,12 +54,12 @@ Defined Variables: {{
 
 Code in the active code cell:
 \`\`\`python
-df['age'] = df[df['age'] > 23<cursor>]
+df['age'] = df[<cursor>['age'] > 23]
 \`\`\`
 
 Output:
 \`\`\`python
-]
+df['age'] = df[df['age'] > 23]
 \`\`\`
 </Example 2>
 
@@ -71,19 +70,45 @@ Defined Variables: {{}}
 
 Code in the active code cell:
 \`\`\`python
-# Create a variable x and set it equal to 1<cursor>
+voters = pd.read_csv('./voters.csv')
+
+# Create a variable for pennsylvania voters, ohio voters, california voters, and texas voters
+pa_voters = voters[voters['state'] == 'PA']
+ohio_voters<cursor>
 \`\`\`
 
 Output:
 \`\`\`python
+ohio_voters = voters[voters['state'] == 'OH']
+ca_voters = voters[voters['state'] == 'CA']
+tx_voters = voters[voters['state'] == 'TX']
 \`\`\`
+
+IMPORTANT: Notice in Example 3 that output does not start with a newline character because it wasnts to continue the line of code that the user started. Also notice the output contains three lines of code because that is the minimal code to achieve the user's intent.
+
 </Example 3>
 
-IMPORTANT: Notice in Example 3 that the output starts with a newline because the cursor appears at the end of a comment line.
+<Example 4>
+Defined Variables: {{}}
+
+Code in the active code cell:
+\`\`\`python
+# Display the first 5 rows of the dataframe
+df.head()
+<cursor>
+\`\`\`
+
+Output:
+\`\`\`python
+\`\`\`
+</Example 4>
+
+IMPORTANT: Notice in Example 4 that the output is empty becuase the user's intent is already complete.
 
 Your Task:
 
-Defined Variables: ${variables?.map(variable => `${JSON.stringify(variable, null, 2)}\n`).join('')}
+Defined Variables: 
+${variables?.map(variable => `${JSON.stringify(variable, null, 2)}\n`).join('')}
 
 Code in the active code cell:
 \`\`\`python

From f34a2fd5ae2ba7d1dcdac1e1a0e1fd9c0a0eab33 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Tue, 14 Jan 2025 17:02:27 -0500
Subject: [PATCH 12/27] evals: update evals

---
 evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py | 2 +-
 evals/test_cases/inline_code_completion_tests/loops.py         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py b/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
index 67b06d0bc..39983a264 100644
--- a/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
+++ b/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
@@ -127,7 +127,7 @@ def post_process_output(self, output: str, prefix: str, suffix: str) -> str:
 
         last_prefix_line = prefix.split("\n")[-1]
         if output.startswith(last_prefix_line) and last_prefix_line != "":
-        # Remove the last line of the prefix if it is the same as the first line of the output
+            # Remove the last line of the prefix if it is the same as the first line of the output
             output = output[len(last_prefix_line):]
 
         first_suffix_line = suffix.split("\n")[0]
diff --git a/evals/test_cases/inline_code_completion_tests/loops.py b/evals/test_cases/inline_code_completion_tests/loops.py
index 1e25abfe7..d39fee05f 100644
--- a/evals/test_cases/inline_code_completion_tests/loops.py
+++ b/evals/test_cases/inline_code_completion_tests/loops.py
@@ -155,6 +155,7 @@ def calculate_percent_change(current_value, previous_value):
         suffix="""
     current_close = returns[year]['current_close']
     previous_close = returns[year]['previous_close']
+    percent_changes.append(calculate_percent_change(current_close, previous_close))
 """,
         type_tags=["code_completion"],
     ),

From 01b5e21a38a8ab46aef28df830276bacb8176a41 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Wed, 15 Jan 2025 17:12:39 -0500
Subject: [PATCH 13/27] mito-ai: add debug messages

---
 mito-ai/src/Extensions/InlineCompleter/provider.ts | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mito-ai/src/Extensions/InlineCompleter/provider.ts b/mito-ai/src/Extensions/InlineCompleter/provider.ts
index 2742a20e1..b83a6ea15 100644
--- a/mito-ai/src/Extensions/InlineCompleter/provider.ts
+++ b/mito-ai/src/Extensions/InlineCompleter/provider.ts
@@ -347,7 +347,10 @@ export class MitoAIInlineCompleter
     let cleanedCompletion = rawCompletion
       .replace(/^```python\n?/, '')  // Remove opening code fence with optional python language
       .replace(/```$/, '')           // Remove closing code fence
-      .replace(/\n$/, '')            // Remove trailing newline
+      .replace(/\n$/, '')    
+      
+    console.log('prefix', prefix)
+    console.log('suffix', suffix)
 
     if (prefix) {
       // Remove duplicate prefix content

From d914355910f2980db65e006fa12aa67f3d8ed5a2 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Wed, 15 Jan 2025 17:14:00 -0500
Subject: [PATCH 14/27] mito-ai: add more console.logs

---
 mito-ai/src/Extensions/InlineCompleter/provider.ts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mito-ai/src/Extensions/InlineCompleter/provider.ts b/mito-ai/src/Extensions/InlineCompleter/provider.ts
index b83a6ea15..dfc167052 100644
--- a/mito-ai/src/Extensions/InlineCompleter/provider.ts
+++ b/mito-ai/src/Extensions/InlineCompleter/provider.ts
@@ -208,6 +208,7 @@ export class MitoAIInlineCompleter
         );
       }
 
+      console.log('calling _cleanCompletion from fetch')
       return {
         items: result.items.map(item => ({
           ...item,
@@ -327,6 +328,7 @@ export class MitoAIInlineCompleter
     fullCompletion += chunk.chunk.content;
     this._fullCompletionMap.set(this._currentStream, fullCompletion);
 
+    console.log('calling _cleanCompletion from stream')
     let cleanedCompletion = this._cleanCompletion(fullCompletion);
 
     this._currentStream.emit({

From 7e241522d0abcb5d22bae71ec05a9449fa9da039 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Wed, 15 Jan 2025 17:23:37 -0500
Subject: [PATCH 15/27] mito-ai: turn off streaming for inline completer

---
 mito-ai/src/Extensions/InlineCompleter/provider.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mito-ai/src/Extensions/InlineCompleter/provider.ts b/mito-ai/src/Extensions/InlineCompleter/provider.ts
index dfc167052..c8f13b2bf 100644
--- a/mito-ai/src/Extensions/InlineCompleter/provider.ts
+++ b/mito-ai/src/Extensions/InlineCompleter/provider.ts
@@ -188,7 +188,7 @@ export class MitoAIInlineCompleter
       const result = await this._client.sendMessage({
         messages: openAIFormattedMessages,
         message_id: messageId.toString(),
-        stream: true,
+        stream: false,
         type: 'inline_completion',
       });
 

From 31fd7e5cb76bb8d31551ba8aa60cdcc4459ea8ea Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Thu, 16 Jan 2025 09:54:21 -0500
Subject: [PATCH 16/27] evals: create new evals for indented code

---
 .../prod_prompt_v4.py                         | 23 +++++-
 .../function_tests.py                         | 71 +++++++++++++++++++
 2 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py b/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
index 39983a264..0b0d5c01e 100644
--- a/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
+++ b/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
@@ -18,7 +18,7 @@ def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) ->
 CRITICAL FORMATTING RULES:
 1. Include a new line character at the start of your response if you want the code you are writing to be added on the line after the cursor. For example, if the cursor is at the end of a comment, you should start your response with a newline character so that the code you write is not added to the comment.
 2. If you are finishing a line of code that the user started, return the full line of code with no newline character at the start or end.
-3. Your response must preserve correct Python indentation and spacing
+3. Your response must preserve correct Python indentation and spacing. For example, if you're completing a line of indented code, you must preserve the indentation.
 
 Your job is to complete the code that matches the user's intent. Write the minimal code to achieve the user's intent. Don't expand upon the user's intent.
 
@@ -111,6 +111,27 @@ def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) ->
 
 IMPORTANT: Notice in Example 4 that the output is empty becuase the user's intent is already complete.
 
+<Example 5>
+Defined Variables: {{}}
+
+Code in the active code cell:
+```python
+def even_and_odd():
+    for i in range(10):
+        if i % 2 == 0:
+            print(f"Even")
+        else:
+            pri<cursor>
+```
+
+Output:
+```python
+            print(f"Odd")
+```
+</Example 5>
+
+IMPORTANT: Notice in Example 5 that the output is indented several times because the code must be executed as part of the else block.
+
 Your Task:
 
 Defined Variables: {notebook_state.global_vars}
diff --git a/evals/test_cases/inline_code_completion_tests/function_tests.py b/evals/test_cases/inline_code_completion_tests/function_tests.py
index 35a60b664..d5f40fa71 100644
--- a/evals/test_cases/inline_code_completion_tests/function_tests.py
+++ b/evals/test_cases/inline_code_completion_tests/function_tests.py
@@ -41,6 +41,77 @@ def my_sum(a, b):
 """,
         type_tags=["code_completion"],
     ),
+
+    InlineCodeCompletionTestCase(
+        name="indented_code_location_finder_function_else_block",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            workflow_tags=["function"],
+            expected_code="""
+def location_finder(x):
+    if x == 'NY':
+        print("You are in NY")
+    elif x == 'PA':
+        print("You are in PA")
+""",
+        ),
+        prefix="""
+def location_finder(x):
+    if x == 'NY':
+        print("You are in NY")
+    elif x == 'PA':
+        print("You are""",
+        suffix="""""",
+        type_tags=["code_completion"],
+    ),
+
+        InlineCodeCompletionTestCase(
+        name="indented_code_location_finder_function_else_clause",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            workflow_tags=["function"],
+            expected_code="""
+def location_finder(x):
+    if x == 'NY':
+        print("You are in NY")
+    elif x == 'PA':
+        print("You are in PA")
+""",
+        ),
+        prefix="""
+def location_finder(x):
+    # Handle the NY Case
+    if x == 'NY':
+        print("You are in NY")
+    # Handle the PA Case
+    elif x""",
+        suffix="""""",
+        type_tags=["code_completion"],
+    ),
+
+    InlineCodeCompletionTestCase(
+        name="indented_code_location_finder_function_continue_after_else_clause",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            workflow_tags=["function"],
+            expected_code="""
+def location_finder(x):
+    if x == 'NY':
+        print("You are in NY")
+    elif x == 'PA':
+        print("You are in PA")
+""",
+        ),
+        prefix="""
+def location_finder(x):
+    # Handle the NY Case
+    if x == 'NY':
+        print("You are in NY")
+    # Handle the PA Case
+    elif x == 'PA':""",
+        suffix="""""",
+        type_tags=["code_completion"],
+    ),
     
     # NUMBER_OF_BMW_FORD_TOYOTA_FIRST_OWNER_FUNCTION
     InlineCodeCompletionTestCase(

From 229dac965bd3f109cbb8b7b17567c20c6fa834e0 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Thu, 16 Jan 2025 09:54:39 -0500
Subject: [PATCH 17/27] mito-ai: improve handling of indented code

---
 .../Extensions/InlineCompleter/provider.ts    |  3 +++
 mito-ai/src/prompts/InlinePrompt.tsx          | 23 ++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/mito-ai/src/Extensions/InlineCompleter/provider.ts b/mito-ai/src/Extensions/InlineCompleter/provider.ts
index c8f13b2bf..d2ae16335 100644
--- a/mito-ai/src/Extensions/InlineCompleter/provider.ts
+++ b/mito-ai/src/Extensions/InlineCompleter/provider.ts
@@ -346,6 +346,9 @@ export class MitoAIInlineCompleter
   }
 
   private _cleanCompletion(rawCompletion: string, prefix?: string, suffix?: string) {
+
+
+    console.log('rawCompletion', rawCompletion)
     let cleanedCompletion = rawCompletion
       .replace(/^```python\n?/, '')  // Remove opening code fence with optional python language
       .replace(/```$/, '')           // Remove closing code fence
diff --git a/mito-ai/src/prompts/InlinePrompt.tsx b/mito-ai/src/prompts/InlinePrompt.tsx
index db27695a3..b8984cd42 100644
--- a/mito-ai/src/prompts/InlinePrompt.tsx
+++ b/mito-ai/src/prompts/InlinePrompt.tsx
@@ -12,7 +12,7 @@ You're given the current code cell, the user's cursor position, and the variable
 CRITICAL FORMATTING RULES:
 1. Include a new line character at the start of your response if you want the code you are writing to be added on the line after the cursor. For example, if the cursor is at the end of a comment, you should start your response with a newline character so that the code you write is not added to the comment.
 2. If you are finishing a line of code that the user started, return the full line of code with no newline character at the start or end.
-3. Your response must preserve correct Python indentation and spacing
+3. Your response must preserve correct Python indentation and spacing. For example, if you're completing a line of indented code, you must preserve the indentation.
 
 Your job is to complete the code that matches the user's intent. Write the minimal code to achieve the user's intent. Don't expand upon the user's intent.
 
@@ -105,6 +105,27 @@ Output:
 
 IMPORTANT: Notice in Example 4 that the output is empty becuase the user's intent is already complete.
 
+<Example 5>
+Defined Variables: {{}}
+
+Code in the active code cell:
+\`\`\`python
+def even_and_odd():
+    for i in range(10):
+        if i % 2 == 0:
+            print(f"Even: {i}")
+        else:
+            pri<cursor>
+\`\`\`
+
+Output:
+\`\`\`python
+            print(f"Odd: {i}")
+\`\`\`
+</Example 5>
+
+IMPORTANT: Notice in Example 5 that the output is indented several times because the code must be executed as part of the else block.
+
 Your Task:
 
 Defined Variables: 

From bf97f7e8b2df111fe4184a1e1e3f0eb239df3131 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Thu, 16 Jan 2025 09:55:32 -0500
Subject: [PATCH 18/27] mito-ai: cleanup

---
 mito-ai/src/Extensions/InlineCompleter/provider.ts | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/mito-ai/src/Extensions/InlineCompleter/provider.ts b/mito-ai/src/Extensions/InlineCompleter/provider.ts
index d2ae16335..38ce9355c 100644
--- a/mito-ai/src/Extensions/InlineCompleter/provider.ts
+++ b/mito-ai/src/Extensions/InlineCompleter/provider.ts
@@ -208,7 +208,6 @@ export class MitoAIInlineCompleter
         );
       }
 
-      console.log('calling _cleanCompletion from fetch')
       return {
         items: result.items.map(item => ({
           ...item,
@@ -328,7 +327,6 @@ export class MitoAIInlineCompleter
     fullCompletion += chunk.chunk.content;
     this._fullCompletionMap.set(this._currentStream, fullCompletion);
 
-    console.log('calling _cleanCompletion from stream')
     let cleanedCompletion = this._cleanCompletion(fullCompletion);
 
     this._currentStream.emit({
@@ -347,26 +345,21 @@ export class MitoAIInlineCompleter
 
   private _cleanCompletion(rawCompletion: string, prefix?: string, suffix?: string) {
 
-
-    console.log('rawCompletion', rawCompletion)
     let cleanedCompletion = rawCompletion
       .replace(/^```python\n?/, '')  // Remove opening code fence with optional python language
       .replace(/```$/, '')           // Remove closing code fence
       .replace(/\n$/, '')    
-      
-    console.log('prefix', prefix)
-    console.log('suffix', suffix)
 
+    // Remove duplicate prefix content
     if (prefix) {
-      // Remove duplicate prefix content
       const lastPrefixLine = prefix.split('\n').slice(-1)[0];
       if (cleanedCompletion.startsWith(lastPrefixLine) && lastPrefixLine !== '') {
         cleanedCompletion = cleanedCompletion.slice(lastPrefixLine.length);
       }
     }
 
+    // Remove duplicate suffix content
     if (suffix) {
-      // Remove duplicate suffix content
       const firstSuffixLine = suffix.split('\n')[0];
       if (cleanedCompletion.endsWith(firstSuffixLine) && firstSuffixLine !== '') {
         cleanedCompletion = cleanedCompletion.slice(0, -firstSuffixLine.length);

From 92db0c747fe7b4c39e003cc2a8ed1142c50df31a Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Thu, 16 Jan 2025 10:20:22 -0500
Subject: [PATCH 19/27] tests: fix inline completer test for fetch

---
 .../mitoai_ui_tests/aiInlineCompleter.spec.ts | 40 +++++++++++--------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts b/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
index 2c6180ef0..f73820516 100644
--- a/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
+++ b/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
@@ -91,18 +91,11 @@ test.describe("default inline completion", () => {
         const messageId = payload.number;
         if (
           payload.type === "inline_completion" &&
-          payload.messages.find((message) => message.content.includes("def fib")) &&
-          payload.stream
+          payload.messages.find((message) => message.content.includes("print")) &&
+          payload.stream == false
         ) {
-          let counter = -1;
-          const streamReply = setInterval(() => {
-            if (++counter < MOCKED_MESSAGES.length) {
-              ws.send(JSON.stringify(MOCKED_MESSAGES[counter]));
-            } else {
-              clearInterval(streamReply);
-              replyDone.resolve();
-            }
-          }, 100);
+          // Send the fetch message back to the client
+          ws.send(JSON.stringify(MOCKED_FETCH_MESSAGE));
         } else {
           ws.send(
             JSON.stringify({
@@ -127,21 +120,21 @@ test.describe("default inline completion", () => {
     // before it grabs the text content.
     await (await page.notebook.getCellLocator(0))!
       .getByRole("textbox")
-      .fill("def fib");
+      .fill("print('hel");
 
     await replyDone.promise;
 
     expect.soft(page.locator(GHOST_SELECTOR)).toHaveCount(1);
     expect
       .soft((await page.notebook.getCellLocator(0))!.getByRole("textbox"))
-      .toHaveText("def fib(n):\n    pass\n");
+      .toHaveText("print('hello')");
 
     await page.keyboard.press("Tab");
 
     expect.soft(page.locator(GHOST_SELECTOR)).toHaveCount(0);
     expect(
       (await page.notebook.getCellLocator(0))!.getByRole("textbox")
-    ).toHaveText("def fib(n):\n    pass\n");
+    ).toHaveText("print('hello')");
   });
 });
 
@@ -184,8 +177,8 @@ test.describe("default manual inline completion", () => {
         ) {
           let counter = -1;
           const streamReply = setInterval(() => {
-            if (++counter < MOCKED_MESSAGES.length) {
-              ws.send(JSON.stringify(MOCKED_MESSAGES[counter]));
+            if (++counter < MOCKED_STREAM_MESSAGES.length) {
+              ws.send(JSON.stringify(MOCKED_STREAM_MESSAGES[counter]));
             } else {
               clearInterval(streamReply);
               replyDone.resolve();
@@ -231,8 +224,21 @@ test.describe("default manual inline completion", () => {
   });
 });
 
+const MOCKED_FETCH_MESSAGE = {
+  error: null,
+  items: [
+    {
+      content: "```python\nprint('hello')\n```",
+      isIncomplete: false,
+      token: null,
+    },
+  ],
+  parent_id: "2",
+  type: "reply",
+};
+
 // Mocked messages to simulate the inline completion process
-const MOCKED_MESSAGES = [
+const MOCKED_STREAM_MESSAGES = [
   {
     items: [
       {

From edabb4f2b925c43f7682b0a783f6fcc577f74203 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Thu, 16 Jan 2025 10:32:18 -0500
Subject: [PATCH 20/27] .github: bump upload artifacts for mito-ai-backend

---
 .github/workflows/test-mito-ai-backend.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-mito-ai-backend.yml b/.github/workflows/test-mito-ai-backend.yml
index 55a44acf4..f709517a7 100644
--- a/.github/workflows/test-mito-ai-backend.yml
+++ b/.github/workflows/test-mito-ai-backend.yml
@@ -46,7 +46,7 @@ jobs:
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
     - name: Upload test-results
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       if: failure()
       with:
         name: mitoai-backend-report-${{ matrix.python-version }}

From e164f24e0830ddaf26e611636602456386ea0397 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Thu, 16 Jan 2025 10:50:11 -0500
Subject: [PATCH 21/27] .github: update upload name for v4

---
 .github/workflows/prerelease-tests.yml        | 4 ++--
 .github/workflows/test-mito-ai-backend.yml    | 2 +-
 .github/workflows/test-mito-ai.yml            | 2 +-
 .github/workflows/test-mitosheet-frontend.yml | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/prerelease-tests.yml b/.github/workflows/prerelease-tests.yml
index fcae389c9..7e1b0e848 100644
--- a/.github/workflows/prerelease-tests.yml
+++ b/.github/workflows/prerelease-tests.yml
@@ -54,7 +54,7 @@ jobs:
           uses: actions/upload-artifact@v4
           if: failure()
           with:
-            name: jupyterlab-playwright-report-${{ matrix.python-version }}
+            name: jupyterlab-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
             path: tests/playwright-report/
             retention-days: 14    
 
@@ -157,7 +157,7 @@ jobs:
         uses: actions/upload-artifact@v4
         if: always()
         with:
-          name: streamlit-playwright-report
+          name: streamlit-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
           path: tests/playwright-report/
           retention-days: 14
     
diff --git a/.github/workflows/test-mito-ai-backend.yml b/.github/workflows/test-mito-ai-backend.yml
index f709517a7..78a688f8c 100644
--- a/.github/workflows/test-mito-ai-backend.yml
+++ b/.github/workflows/test-mito-ai-backend.yml
@@ -49,6 +49,6 @@ jobs:
       uses: actions/upload-artifact@v4
       if: failure()
       with:
-        name: mitoai-backend-report-${{ matrix.python-version }}
+        name: mitoai-backend-report-${{ matrix.python-version }}-${{ github.run_id }}
         path: mito-ai/tests/pytest-report/
         retention-days: 14
diff --git a/.github/workflows/test-mito-ai.yml b/.github/workflows/test-mito-ai.yml
index 412a41500..af8ae9251 100644
--- a/.github/workflows/test-mito-ai.yml
+++ b/.github/workflows/test-mito-ai.yml
@@ -62,6 +62,6 @@ jobs:
       uses: actions/upload-artifact@v4
       if: failure()
       with:
-        name: mitoai-jupyterlab-playwright-report-${{ matrix.python-version }}
+        name: mitoai-jupyterlab-playwright-report-${{ matrix.python-version }}-${{ matrix.use-mito-ai-server }}-${{ github.run_id }}
         path: tests/playwright-report/
         retention-days: 14
\ No newline at end of file
diff --git a/.github/workflows/test-mitosheet-frontend.yml b/.github/workflows/test-mitosheet-frontend.yml
index 667dbb247..2d7e1c1ac 100644
--- a/.github/workflows/test-mitosheet-frontend.yml
+++ b/.github/workflows/test-mitosheet-frontend.yml
@@ -60,7 +60,7 @@ jobs:
       uses: actions/upload-artifact@v4
       if: failure()
       with:
-        name: jupyterlab-playwright-report-${{ matrix.python-version }}
+        name: jupyterlab-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
         path: tests/playwright-report/
         retention-days: 14
 
@@ -111,7 +111,7 @@ jobs:
       uses: actions/upload-artifact@v4
       if: failure()
       with:
-        name: jupyternotebook-playwright-report-${{ matrix.python-version }}
+        name: jupyternotebook-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
         path: tests/playwright-report/
         retention-days: 14
 

From 695f8ecef3969166a0ee4be90abe5bd00f7c6362 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Thu, 16 Jan 2025 11:13:31 -0500
Subject: [PATCH 22/27] tests: fix mito-ai test for non-streaming

---
 .../mitoai_ui_tests/aiInlineCompleter.spec.ts | 29 ++++++++++---------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts b/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
index f73820516..542a638f9 100644
--- a/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
+++ b/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
@@ -81,7 +81,7 @@ test.describe("default inline completion", () => {
     },
   });
 
-  test("should display inline completion", async ({ page, tmpPath }) => {
+  test.only("should display inline completion", async ({ page, tmpPath }) => {
     const replyDone = new PromiseDelegate<void>();
     // Mock completion request with code prefix 'def fib'
     await page.routeWebSocket(/.*\/mito-ai\/completions/, (ws) => {
@@ -89,13 +89,15 @@ test.describe("default inline completion", () => {
       ws.onMessage((message) => {
         const payload = JSON.parse(message as string);
         const messageId = payload.number;
+        console.log(payload)
         if (
           payload.type === "inline_completion" &&
           payload.messages.find((message) => message.content.includes("print")) &&
           payload.stream == false
         ) {
           // Send the fetch message back to the client
-          ws.send(JSON.stringify(MOCKED_FETCH_MESSAGE));
+          ws.send(JSON.stringify(MOCKED_FETCH_RESULT));
+          replyDone.resolve();
         } else {
           ws.send(
             JSON.stringify({
@@ -123,8 +125,8 @@ test.describe("default inline completion", () => {
       .fill("print('hel");
 
     await replyDone.promise;
-
     expect.soft(page.locator(GHOST_SELECTOR)).toHaveCount(1);
+
     expect
       .soft((await page.notebook.getCellLocator(0))!.getByRole("textbox"))
       .toHaveText("print('hello')");
@@ -132,6 +134,7 @@ test.describe("default inline completion", () => {
     await page.keyboard.press("Tab");
 
     expect.soft(page.locator(GHOST_SELECTOR)).toHaveCount(0);
+
     expect(
       (await page.notebook.getCellLocator(0))!.getByRole("textbox")
     ).toHaveText("print('hello')");
@@ -224,17 +227,17 @@ test.describe("default manual inline completion", () => {
   });
 });
 
-const MOCKED_FETCH_MESSAGE = {
-  error: null,
-  items: [
-    {
-      content: "```python\nprint('hello')\n```",
-      isIncomplete: false,
-      token: null,
-    },
-  ],
-  parent_id: "2",
+const MOCKED_FETCH_RESULT = {
+  items: [{
+    content: "```python\nprint('hello')```",
+    error: null,
+    insertText: "lo')",
+    isIncomplete: false,  
+    token: null
+  }],
+  parent_id: "1",
   type: "reply",
+  error: null,
 };
 
 // Mocked messages to simulate the inline completion process

From 4652dfa89a1893e88a2916f76c3024dbfa7f3469 Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Thu, 16 Jan 2025 11:18:37 -0500
Subject: [PATCH 23/27] tests: remove .only

---
 tests/mitoai_ui_tests/aiInlineCompleter.spec.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts b/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
index 542a638f9..4739f2c42 100644
--- a/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
+++ b/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
@@ -81,7 +81,7 @@ test.describe("default inline completion", () => {
     },
   });
 
-  test.only("should display inline completion", async ({ page, tmpPath }) => {
+  test("should display inline completion", async ({ page, tmpPath }) => {
     const replyDone = new PromiseDelegate<void>();
     // Mock completion request with code prefix 'def fib'
     await page.routeWebSocket(/.*\/mito-ai\/completions/, (ws) => {

From dc930b185d025aa7ccd3968d7d4a4757ebd40e8f Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Thu, 16 Jan 2025 11:26:34 -0500
Subject: [PATCH 24/27] tests: cleanup tests

---
 tests/mitoai_ui_tests/aiInlineCompleter.spec.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts b/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
index 4739f2c42..5bba69b88 100644
--- a/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
+++ b/tests/mitoai_ui_tests/aiInlineCompleter.spec.ts
@@ -85,11 +85,9 @@ test.describe("default inline completion", () => {
     const replyDone = new PromiseDelegate<void>();
     // Mock completion request with code prefix 'def fib'
     await page.routeWebSocket(/.*\/mito-ai\/completions/, (ws) => {
-      console.log("Mocking inline completion request");
       ws.onMessage((message) => {
         const payload = JSON.parse(message as string);
         const messageId = payload.number;
-        console.log(payload)
         if (
           payload.type === "inline_completion" &&
           payload.messages.find((message) => message.content.includes("print")) &&

From d3f9b733b69301c94462cf446c773395f173eb8b Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Fri, 17 Jan 2025 12:22:00 -0500
Subject: [PATCH 25/27] evals: add new evals for handling newlines

---
 .../misc_tests.py                             | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/evals/test_cases/inline_code_completion_tests/misc_tests.py b/evals/test_cases/inline_code_completion_tests/misc_tests.py
index ec4567a10..0382c4f98 100644
--- a/evals/test_cases/inline_code_completion_tests/misc_tests.py
+++ b/evals/test_cases/inline_code_completion_tests/misc_tests.py
@@ -52,4 +52,95 @@
         type_tags=["comment_following"],
     ),
 
+    InlineCodeCompletionTestCase(
+        name="finish_today_variable_with_equals",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="""
+import datetime
+
+# Get today's date with just the date component using datetime.datetime.today().date()
+today_date = datetime.datetime.today().date()
+""",
+            workflow_tags=["misc"],
+        ),
+        prefix="""
+import datetime
+
+# Get today's date with just the date component using datetime.datetime.today().date()
+today_date = """,
+        suffix="""""",
+        type_tags=["code_completion"],
+    ),
+
+    InlineCodeCompletionTestCase(
+        name="finish_today_variable_without_equals",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="""
+import datetime
+
+# Get today's date with just the date component using datetime.datetime.today().date()
+today_date = datetime.datetime.today().date()
+""",
+            workflow_tags=["misc"],
+        ),
+        prefix="""
+import datetime
+
+# Get today's date with just the date component using datetime.datetime.today().date()
+today_date """,
+        suffix="""""",
+        type_tags=["code_completion"],
+    ),
+
+    InlineCodeCompletionTestCase(
+        name="print_after_15th_cursor_at_end_of_comment",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="""
+import datetime
+
+today_date = datetime.datetime.today().date()
+
+# If today is after the 15th of the month print 'After 15th'
+if today_date.day > 15:
+    print('After 15th')
+""",
+            workflow_tags=["misc"],
+        ),
+        prefix="""
+import datetime
+
+today_date = datetime.datetime.today().date()
+
+# If today is after the 15th of the month print 'After 15th'""",
+        suffix="""""",
+        type_tags=["comment_following"],
+    ),
+            InlineCodeCompletionTestCase(
+        name="print_after_2pm_cursor_after_comment",
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="""
+import datetime
+
+today_date = datetime.datetime.today().date()
+
+# If today is after the 15th of the month print 'After 15th'
+if today_date.day > 15:
+    print('After 15th')
+""",
+            workflow_tags=["misc"],
+        ),
+        prefix="""
+import datetime
+
+today_date = datetime.datetime.today().date()
+
+# If today is after the 15th of the month print 'After 15th'
+""",
+        suffix="""""",
+        type_tags=["comment_following"],
+    ),
 ]

From dea05260ebd7e88aec2efdff0be0147339888acb Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Fri, 17 Jan 2025 12:30:59 -0500
Subject: [PATCH 26/27] evals: add more variable declaration evals

---
 .../misc_tests.py                             | 100 ++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/evals/test_cases/inline_code_completion_tests/misc_tests.py b/evals/test_cases/inline_code_completion_tests/misc_tests.py
index 0382c4f98..0da285196 100644
--- a/evals/test_cases/inline_code_completion_tests/misc_tests.py
+++ b/evals/test_cases/inline_code_completion_tests/misc_tests.py
@@ -94,6 +94,106 @@
         type_tags=["code_completion"],
     ),
 
+    InlineCodeCompletionTestCase(
+        name='finish_total_variable_before_equals',
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="""
+x = 10
+y = 20
+z = 30
+
+# Sum of x, y, and z
+total_sum = x + y + z
+""",
+            workflow_tags=["misc"],
+        ),
+        prefix="""
+x = 10
+y = 20
+z = 30
+
+# Sum of x, y, and z
+total_sum""",
+        suffix="""""",
+        type_tags=["code_completion"],
+    ),
+
+        InlineCodeCompletionTestCase(
+        name='finish_total_variable_before_equals_with_space',
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="""
+x = 10
+y = 20
+z = 30
+
+# Sum of x, y, and z
+total_sum = x + y + z
+""",
+            workflow_tags=["misc"],
+        ),
+        prefix="""
+x = 10
+y = 20
+z = 30
+
+# Sum of x, y, and z
+total_sum """,
+        suffix="""""",
+        type_tags=["code_completion"],
+    ),
+
+    InlineCodeCompletionTestCase(
+        name='finish_total_variable_at_equals',
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="""
+x = 10
+y = 20
+z = 30
+
+# Sum of x, y, and z
+total_sum = x + y + z
+""",
+            workflow_tags=["misc"],
+        ),
+        prefix="""
+x = 10
+y = 20
+z = 30
+
+# Sum of x, y, and z
+total_sum =""",
+        suffix="""""",
+        type_tags=["code_completion"],
+    ),
+
+    InlineCodeCompletionTestCase(
+        name='finish_total_variable_after_equals_space',
+        test_case_core=CodeGenTestCaseCore(
+            notebook_state=EMPTY_NOTEBOOK,
+            expected_code="""
+x = 10
+y = 20
+z = 30
+
+# Sum of x, y, and z
+total_sum = x + y + z
+""",
+            workflow_tags=["misc"],
+        ),
+        prefix="""
+x = 10
+y = 20
+z = 30
+
+# Sum of x, y, and z
+total_sum = """,
+        suffix="""""",
+        type_tags=["code_completion"],
+    ),
+
     InlineCodeCompletionTestCase(
         name="print_after_15th_cursor_at_end_of_comment",
         test_case_core=CodeGenTestCaseCore(

From 89d53c13f94d59602df1a8972769e6816335e97e Mon Sep 17 00:00:00 2001
From: Aaron Diamond-Reivich <aarondr77@gmail.com>
Date: Fri, 17 Jan 2025 13:07:36 -0500
Subject: [PATCH 27/27] mito-ai: improve prompt to prevent extra new lines

---
 .../prod_prompt_v4.py                           | 17 +++++++++++++++++
 mito-ai/src/prompts/InlinePrompt.tsx            | 16 ++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py b/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
index 0b0d5c01e..738409bf1 100644
--- a/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
+++ b/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
@@ -132,6 +132,23 @@ def even_and_odd():
 
 IMPORTANT: Notice in Example 5 that the output is indented several times because the code must be executed as part of the else block.
 
+<Example 6>
+Defined Variables: {{}}
+
+Code in the active code cell:
+```python
+days_in_week <cursor>
+```
+
+Output:
+```python
+days_in_week = 7
+```
+</Example 6>
+
+IMPORTANT: Notice in Example 6 that inorder to finish the variable declaration, the output continues the existing line of code and does not start with a new line character.
+
+
 Your Task:
 
 Defined Variables: {notebook_state.global_vars}
diff --git a/mito-ai/src/prompts/InlinePrompt.tsx b/mito-ai/src/prompts/InlinePrompt.tsx
index b8984cd42..bacc5b85a 100644
--- a/mito-ai/src/prompts/InlinePrompt.tsx
+++ b/mito-ai/src/prompts/InlinePrompt.tsx
@@ -126,6 +126,22 @@ Output:
 
 IMPORTANT: Notice in Example 5 that the output is indented several times because the code must be executed as part of the else block.
 
+<Example 6>
+Defined Variables: {{}}
+
+Code in the active code cell:
+\`\`\`python
+days_in_week <cursor>
+\`\`\`
+
+Output:
+\`\`\`python
+days_in_week = 7
+\`\`\`
+</Example 6>
+
+IMPORTANT: Notice in Example 6 that inorder to finish the variable declaration, the output continues the existing line of code and does not start with a new line character.
+
 Your Task:
 
 Defined Variables: