Merge pull request #1483 from mito-ds/add-prefix-autocomplete-evals

Do not repeat prefix and suffix in autocomplete
mito-ds · Jan 19, 2025 · f0e6fb5 · f0e6fb5
2 parents 987fe02 + 89d53c1
commit f0e6fb5
Show file tree

Hide file tree

Showing 18 changed files with 792 additions and 50 deletions.
diff --git a/.github/workflows/prerelease-tests.yml b/.github/workflows/prerelease-tests.yml
@@ -54,7 +54,7 @@ jobs:
           uses: actions/upload-artifact@v4
           if: failure()
           with:
-            name: jupyterlab-playwright-report-${{ matrix.python-version }}
+            name: jupyterlab-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
             path: tests/playwright-report/
             retention-days: 14    
 
@@ -157,7 +157,7 @@ jobs:
         uses: actions/upload-artifact@v4
         if: always()
         with:
-          name: streamlit-playwright-report
+          name: streamlit-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
           path: tests/playwright-report/
           retention-days: 14
 

diff --git a/.github/workflows/test-mito-ai-backend.yml b/.github/workflows/test-mito-ai-backend.yml
@@ -46,9 +46,9 @@ jobs:
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
     - name: Upload test-results
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       if: failure()
       with:
-        name: mitoai-backend-report-${{ matrix.python-version }}
+        name: mitoai-backend-report-${{ matrix.python-version }}-${{ github.run_id }}
         path: mito-ai/tests/pytest-report/
         retention-days: 14
diff --git a/.github/workflows/test-mito-ai.yml b/.github/workflows/test-mito-ai.yml
@@ -67,6 +67,6 @@ jobs:
       uses: actions/upload-artifact@v4
       if: failure()
       with:
-        name: mitoai-jupyterlab-playwright-report-${{ matrix.python-version }}
+        name: mitoai-jupyterlab-playwright-report-${{ matrix.python-version }}-${{ matrix.use-mito-ai-server }}-${{ github.run_id }}
         path: tests/playwright-report/
         retention-days: 14
diff --git a/.github/workflows/test-mitosheet-frontend.yml b/.github/workflows/test-mitosheet-frontend.yml
@@ -62,7 +62,7 @@ jobs:
       uses: actions/upload-artifact@v4
       if: failure()
       with:
-        name: jupyterlab-playwright-report-${{ matrix.python-version }}
+        name: jupyterlab-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
         path: tests/playwright-report/
         retention-days: 14
 
@@ -113,7 +113,7 @@ jobs:
       uses: actions/upload-artifact@v4
       if: failure()
       with:
-        name: jupyternotebook-playwright-report-${{ matrix.python-version }}
+        name: jupyternotebook-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
         path: tests/playwright-report/
         retention-days: 14
 

diff --git a/evals/eval_types.py b/evals/eval_types.py
@@ -114,10 +114,15 @@ class InlineCodeCompletionPromptGenerator():
 
     def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) -> str:
         raise NotImplementedError("Subclasses must implement this method")
+
+    def post_process_output(self, output: str, prefix: str, suffix: str) -> str:
+        # Default implementation returns the output unchanged
+        return output
 
 class DebugPromptGenerator():
 
     prompt_name: str
 
     def get_prompt(self, error_message: str, notebook_state: NotebookState) -> str:
         raise NotImplementedError("Subclasses must implement this method")
+
diff --git a/evals/notebook_states.py b/evals/notebook_states.py
@@ -170,4 +170,63 @@
 }
 
 """, '']
+)
+
+
+nba_players_df = pd.DataFrame({
+    'player_name': [
+        'LeBron James', 'Kevin Durant', 'Stephen Curry', 'Giannis Antetokounmpo',
+        'Kawhi Leonard', 'James Harden', 'Luka Dončić', 'Damian Lillard',
+        'Joel Embiid', 'Nikola Jokić', 'Anthony Davis', 'Chris Paul',
+        'Jayson Tatum', 'Zion Williamson', 'Devin Booker'
+    ],
+    'team': [
+        'Los Angeles Lakers', 'Brooklyn Nets', 'Golden State Warriors', 'Milwaukee Bucks',
+        'Los Angeles Clippers', 'Brooklyn Nets', 'Dallas Mavericks', 'Portland Trail Blazers',
+        'Philadelphia 76ers', 'Denver Nuggets', 'Los Angeles Lakers', 'Phoenix Suns',
+        'Boston Celtics', 'New Orleans Pelicans', 'Phoenix Suns'
+    ],
+    'position': [
+        'SF', 'PF', 'PG', 'PF',
+        'SF', 'SG', 'PG', 'PG',
+        'C', 'C', 'PF', 'PG',
+        'SF', 'PF', 'SG'
+    ],
+    'points_per_game': [
+        25.0, 27.0, 29.0, 28.0,
+        24.0, 25.0, 28.0, 27.0,
+        33.0, 26.0, 22.0, 18.0,
+        26.0, 22.0, 25.0
+    ]
+})
+
+NBA_PLAYERS_NOTEBOOK: NotebookState = NotebookState(
+    global_vars={'nba_players_df': nba_players_df.head(5)},
+    cell_contents=["""import pandas as pd
+nba_players_df = pd.DataFrame({
+    'player_name': [
+        'LeBron James', 'Kevin Durant', 'Stephen Curry', 'Giannis Antetokounmpo',
+        'Kawhi Leonard', 'James Harden', 'Luka Dončić', 'Damian Lillard',
+        'Joel Embiid', 'Nikola Jokić', 'Anthony Davis', 'Chris Paul',
+        'Jayson Tatum', 'Zion Williamson', 'Devin Booker'
+    ],
+    'team': [
+        'Los Angeles Lakers', 'Brooklyn Nets', 'Golden State Warriors', 'Milwaukee Bucks',
+        'Los Angeles Clippers', 'Brooklyn Nets', 'Dallas Mavericks', 'Portland Trail Blazers',
+        'Philadelphia 76ers', 'Denver Nuggets', 'Los Angeles Lakers', 'Phoenix Suns',
+        'Boston Celtics', 'New Orleans Pelicans', 'Phoenix Suns'
+    ],
+    'position': [
+        'SF', 'PF', 'PG', 'PF',
+        'SF', 'SG', 'PG', 'PG',
+        'C', 'C', 'PF', 'PG',
+        'SF', 'PF', 'SG'
+    ],
+    'points_per_game': [
+        25.0, 27.0, 29.0, 28.0,
+        24.0, 25.0, 28.0, 27.0,
+        33.0, 26.0, 22.0, 18.0,
+        26.0, 22.0, 25.0
+    ]
+})""", '']
 )
diff --git a/evals/prompts/inline_code_completion_prompts/__init__.py b/evals/prompts/inline_code_completion_prompts/__init__.py
@@ -2,10 +2,12 @@
 from evals.prompts.inline_code_completion_prompts.prod_prompt_v1 import prod_prompt_v1
 from evals.prompts.inline_code_completion_prompts.prod_prompt_v2 import prod_prompt_v2
 from evals.prompts.inline_code_completion_prompts.prod_prompt_v3 import prod_prompt_v3
+from evals.prompts.inline_code_completion_prompts.prod_prompt_v4 import prod_prompt_v4
 
 
 INLINE_CODE_COMPLETION_PROMPT_GENERATORS = [
     prod_prompt_v1,
     prod_prompt_v2,
     prod_prompt_v3,
+    prod_prompt_v4,
 ]
diff --git a/evals/prompts/inline_code_completion_prompts/prod_prompt_v3.py b/evals/prompts/inline_code_completion_prompts/prod_prompt_v3.py
@@ -56,12 +56,12 @@ def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) ->
 
 Code in the active code cell:
 ```python
-df['age'] = df[df['age'] > 23<cursor>]
+df['age'] = df[<cursor>['age'] > 23]
 ```
 
 Output:
 ```python
-]
+df
 ```
 </Example 2>
 
@@ -77,6 +77,8 @@ def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) ->
 
 Output:
 ```python
+
+x=1
 ```
 </Example 3>
 

diff --git a/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py b/evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
@@ -0,0 +1,180 @@
+from evals.eval_types import InlineCodeCompletionPromptGenerator, NotebookState, ChatPromptGenerator
+
+__all__ = ['prod_prompt_v4']
+
+# This prompt makes one big strategy change: Instead of trying to get the AI to just complete fill in the blanks between the prefix and the suffix, 
+# it asks the AI to return the full line of code that matches the user's intent. Then, we post-process the AI's output
+# by making sure that it does not rewrite the last line of the prefix or the first line of the suffix.
+
+class _ProdPromptV4(InlineCodeCompletionPromptGenerator):
+    prompt_name = "prod_prompt_v4"
+
+    def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) -> str:
+
+        return f"""You are a coding assistant that lives inside of JupyterLab. Your job is to help the user write code. 
+
+You're given the current code cell, the user's cursor position, and the variables defined in the notebook. The user's cursor is signified by the symbol <cursor>.
+
+CRITICAL FORMATTING RULES:
+1. Include a new line character at the start of your response if you want the code you are writing to be added on the line after the cursor. For example, if the cursor is at the end of a comment, you should start your response with a newline character so that the code you write is not added to the comment.
+2. If you are finishing a line of code that the user started, return the full line of code with no newline character at the start or end.
+3. Your response must preserve correct Python indentation and spacing. For example, if you're completing a line of indented code, you must preserve the indentation.
+
+Your job is to complete the code that matches the user's intent. Write the minimal code to achieve the user's intent. Don't expand upon the user's intent.
+
+<Example 1>
+Defined Variables: {{
+    'loan_multiplier': 1.5,
+    'sales_df': pd.DataFrame({{
+        'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
+        'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
+        'units_sold': [1, 2, 1, 4, 5],
+        'total_price': [10, 19.98, 13.99, 84.00, 500]
+    }})
+}}
+
+Code in the active code cell:
+```python
+import pandas as pd
+sales_df = pd.read_csv('./sales.csv')
+
+# Multiply the total_price column by the loan_multiplier<cursor>
+```
+
+Output:
+```python
+
+sales_df['total_price'] = sales_df['total_price'] * loan_multiplier
+```
+</Example 1>
+
+IMPORTANT: Notice in Example 1 that the output starts with a newline because the cursor was at the end of a comment. This newline is REQUIRED to maintain proper Python formatting.
+
+<Example 2>
+Defined Variables: {{
+    df: pd.DataFrame({{
+        'age': [20, 25, 22, 23, 29],
+        'name': ['Nawaz', 'Aaron', 'Charlie', 'Tamir', 'Eve'],
+    }})
+}}
+
+Code in the active code cell:
+```python
+df['age'] = df[<cursor>['age'] > 23]
+```
+
+Output:
+```python
+df['age'] = df[df['age'] > 23]
+```
+</Example 2>
+
+IMPORTANT: Notice in Example 2 that the output does NOT start with a newline because the cursor is in the middle of existing code.
+
+<Example 3>
+Defined Variables: {{}}
+
+Code in the active code cell:
+```python
+voters = pd.read_csv('./voters.csv')
+
+# Create a variable for pennsylvania voters, ohio voters, california voters, and texas voters
+pa_voters = voters[voters['state'] == 'PA']
+ohio_voters<cursor>
+```
+
+Output:
+```python
+ohio_voters = voters[voters['state'] == 'OH']
+ca_voters = voters[voters['state'] == 'CA']
+tx_voters = voters[voters['state'] == 'TX']
+```
+
+IMPORTANT: Notice in Example 3 that output does not start with a newline character because it wasnts to continue the line of code that the user started. Also notice the output contains three lines of code because that is the minimal code to achieve the user's intent.
+
+</Example 3>
+
+<Example 4>
+Defined Variables: {{}}
+
+Code in the active code cell:
+```python
+# Display the first 5 rows of the dataframe
+df.head()
+<cursor>
+```
+
+Output:
+```python
+```
+</Example 4>
+
+IMPORTANT: Notice in Example 4 that the output is empty becuase the user's intent is already complete.
+
+<Example 5>
+Defined Variables: {{}}
+
+Code in the active code cell:
+```python
+def even_and_odd():
+    for i in range(10):
+        if i % 2 == 0:
+            print(f"Even")
+        else:
+            pri<cursor>
+```
+
+Output:
+```python
+            print(f"Odd")
+```
+</Example 5>
+
+IMPORTANT: Notice in Example 5 that the output is indented several times because the code must be executed as part of the else block.
+
+<Example 6>
+Defined Variables: {{}}
+
+Code in the active code cell:
+```python
+days_in_week <cursor>
+```
+
+Output:
+```python
+days_in_week = 7
+```
+</Example 6>
+
+IMPORTANT: Notice in Example 6 that inorder to finish the variable declaration, the output continues the existing line of code and does not start with a new line character.
+
+
+Your Task:
+
+Defined Variables: {notebook_state.global_vars}
+
+Code in the active code cell:
+```python
+{prefix}<cursor>{suffix}
+```
+
+Output:
+"""
+
+    def post_process_output(self, output: str, prefix: str, suffix: str) -> str:
+
+        last_prefix_line = prefix.split("\n")[-1]
+        if output.startswith(last_prefix_line) and last_prefix_line != "":
+            # Remove the last line of the prefix if it is the same as the first line of the output
+            output = output[len(last_prefix_line):]
+
+        first_suffix_line = suffix.split("\n")[0]
+        if output.endswith(first_suffix_line) and first_suffix_line != "":
+            # Remove the first line of the suffix if it is the same as the last line of the output
+            output = output[:-len(first_suffix_line)]
+
+        return output
+
+prod_prompt_v4 = _ProdPromptV4()
+
+