Skip to content

Commit

Permalink
Merge pull request #1483 from mito-ds/add-prefix-autocomplete-evals
Browse files Browse the repository at this point in the history
Do not repeat prefix and suffix in autocomplete
  • Loading branch information
aarondr77 authored Jan 19, 2025
2 parents 987fe02 + 89d53c1 commit f0e6fb5
Show file tree
Hide file tree
Showing 18 changed files with 792 additions and 50 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/prerelease-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
uses: actions/upload-artifact@v4
if: failure()
with:
name: jupyterlab-playwright-report-${{ matrix.python-version }}
name: jupyterlab-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
path: tests/playwright-report/
retention-days: 14

Expand Down Expand Up @@ -157,7 +157,7 @@ jobs:
uses: actions/upload-artifact@v4
if: always()
with:
name: streamlit-playwright-report
name: streamlit-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
path: tests/playwright-report/
retention-days: 14

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-mito-ai-backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ jobs:
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
- name: Upload test-results
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
if: failure()
with:
name: mitoai-backend-report-${{ matrix.python-version }}
name: mitoai-backend-report-${{ matrix.python-version }}-${{ github.run_id }}
path: mito-ai/tests/pytest-report/
retention-days: 14
2 changes: 1 addition & 1 deletion .github/workflows/test-mito-ai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,6 @@ jobs:
uses: actions/upload-artifact@v4
if: failure()
with:
name: mitoai-jupyterlab-playwright-report-${{ matrix.python-version }}
name: mitoai-jupyterlab-playwright-report-${{ matrix.python-version }}-${{ matrix.use-mito-ai-server }}-${{ github.run_id }}
path: tests/playwright-report/
retention-days: 14
4 changes: 2 additions & 2 deletions .github/workflows/test-mitosheet-frontend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
uses: actions/upload-artifact@v4
if: failure()
with:
name: jupyterlab-playwright-report-${{ matrix.python-version }}
name: jupyterlab-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
path: tests/playwright-report/
retention-days: 14

Expand Down Expand Up @@ -113,7 +113,7 @@ jobs:
uses: actions/upload-artifact@v4
if: failure()
with:
name: jupyternotebook-playwright-report-${{ matrix.python-version }}
name: jupyternotebook-playwright-report-${{ matrix.python-version }}-${{ github.run_id }}
path: tests/playwright-report/
retention-days: 14

Expand Down
5 changes: 5 additions & 0 deletions evals/eval_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,15 @@ class InlineCodeCompletionPromptGenerator():

def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) -> str:
raise NotImplementedError("Subclasses must implement this method")

def post_process_output(self, output: str, prefix: str, suffix: str) -> str:
# Default implementation returns the output unchanged
return output

class DebugPromptGenerator():

prompt_name: str

def get_prompt(self, error_message: str, notebook_state: NotebookState) -> str:
raise NotImplementedError("Subclasses must implement this method")

59 changes: 59 additions & 0 deletions evals/notebook_states.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,63 @@
}
""", '']
)


nba_players_df = pd.DataFrame({
'player_name': [
'LeBron James', 'Kevin Durant', 'Stephen Curry', 'Giannis Antetokounmpo',
'Kawhi Leonard', 'James Harden', 'Luka Dončić', 'Damian Lillard',
'Joel Embiid', 'Nikola Jokić', 'Anthony Davis', 'Chris Paul',
'Jayson Tatum', 'Zion Williamson', 'Devin Booker'
],
'team': [
'Los Angeles Lakers', 'Brooklyn Nets', 'Golden State Warriors', 'Milwaukee Bucks',
'Los Angeles Clippers', 'Brooklyn Nets', 'Dallas Mavericks', 'Portland Trail Blazers',
'Philadelphia 76ers', 'Denver Nuggets', 'Los Angeles Lakers', 'Phoenix Suns',
'Boston Celtics', 'New Orleans Pelicans', 'Phoenix Suns'
],
'position': [
'SF', 'PF', 'PG', 'PF',
'SF', 'SG', 'PG', 'PG',
'C', 'C', 'PF', 'PG',
'SF', 'PF', 'SG'
],
'points_per_game': [
25.0, 27.0, 29.0, 28.0,
24.0, 25.0, 28.0, 27.0,
33.0, 26.0, 22.0, 18.0,
26.0, 22.0, 25.0
]
})

NBA_PLAYERS_NOTEBOOK: NotebookState = NotebookState(
global_vars={'nba_players_df': nba_players_df.head(5)},
cell_contents=["""import pandas as pd
nba_players_df = pd.DataFrame({
'player_name': [
'LeBron James', 'Kevin Durant', 'Stephen Curry', 'Giannis Antetokounmpo',
'Kawhi Leonard', 'James Harden', 'Luka Dončić', 'Damian Lillard',
'Joel Embiid', 'Nikola Jokić', 'Anthony Davis', 'Chris Paul',
'Jayson Tatum', 'Zion Williamson', 'Devin Booker'
],
'team': [
'Los Angeles Lakers', 'Brooklyn Nets', 'Golden State Warriors', 'Milwaukee Bucks',
'Los Angeles Clippers', 'Brooklyn Nets', 'Dallas Mavericks', 'Portland Trail Blazers',
'Philadelphia 76ers', 'Denver Nuggets', 'Los Angeles Lakers', 'Phoenix Suns',
'Boston Celtics', 'New Orleans Pelicans', 'Phoenix Suns'
],
'position': [
'SF', 'PF', 'PG', 'PF',
'SF', 'SG', 'PG', 'PG',
'C', 'C', 'PF', 'PG',
'SF', 'PF', 'SG'
],
'points_per_game': [
25.0, 27.0, 29.0, 28.0,
24.0, 25.0, 28.0, 27.0,
33.0, 26.0, 22.0, 18.0,
26.0, 22.0, 25.0
]
})""", '']
)
2 changes: 2 additions & 0 deletions evals/prompts/inline_code_completion_prompts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
from evals.prompts.inline_code_completion_prompts.prod_prompt_v1 import prod_prompt_v1
from evals.prompts.inline_code_completion_prompts.prod_prompt_v2 import prod_prompt_v2
from evals.prompts.inline_code_completion_prompts.prod_prompt_v3 import prod_prompt_v3
from evals.prompts.inline_code_completion_prompts.prod_prompt_v4 import prod_prompt_v4


INLINE_CODE_COMPLETION_PROMPT_GENERATORS = [
prod_prompt_v1,
prod_prompt_v2,
prod_prompt_v3,
prod_prompt_v4,
]
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) ->
Code in the active code cell:
```python
df['age'] = df[df['age'] > 23<cursor>]
df['age'] = df[<cursor>['age'] > 23]
```
Output:
```python
]
df
```
</Example 2>
Expand All @@ -77,6 +77,8 @@ def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) ->
Output:
```python
x=1
```
</Example 3>
Expand Down
180 changes: 180 additions & 0 deletions evals/prompts/inline_code_completion_prompts/prod_prompt_v4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
from evals.eval_types import InlineCodeCompletionPromptGenerator, NotebookState, ChatPromptGenerator

__all__ = ['prod_prompt_v4']

# This prompt makes one big strategy change: Instead of trying to get the AI to just complete fill in the blanks between the prefix and the suffix,
# it asks the AI to return the full line of code that matches the user's intent. Then, we post-process the AI's output
# by making sure that it does not rewrite the last line of the prefix or the first line of the suffix.

class _ProdPromptV4(InlineCodeCompletionPromptGenerator):
prompt_name = "prod_prompt_v4"

def get_prompt(self, prefix: str, suffix: str, notebook_state: NotebookState) -> str:

return f"""You are a coding assistant that lives inside of JupyterLab. Your job is to help the user write code.
You're given the current code cell, the user's cursor position, and the variables defined in the notebook. The user's cursor is signified by the symbol <cursor>.
CRITICAL FORMATTING RULES:
1. Include a new line character at the start of your response if you want the code you are writing to be added on the line after the cursor. For example, if the cursor is at the end of a comment, you should start your response with a newline character so that the code you write is not added to the comment.
2. If you are finishing a line of code that the user started, return the full line of code with no newline character at the start or end.
3. Your response must preserve correct Python indentation and spacing. For example, if you're completing a line of indented code, you must preserve the indentation.
Your job is to complete the code that matches the user's intent. Write the minimal code to achieve the user's intent. Don't expand upon the user's intent.
<Example 1>
Defined Variables: {{
'loan_multiplier': 1.5,
'sales_df': pd.DataFrame({{
'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
'units_sold': [1, 2, 1, 4, 5],
'total_price': [10, 19.98, 13.99, 84.00, 500]
}})
}}
Code in the active code cell:
```python
import pandas as pd
sales_df = pd.read_csv('./sales.csv')
# Multiply the total_price column by the loan_multiplier<cursor>
```
Output:
```python
sales_df['total_price'] = sales_df['total_price'] * loan_multiplier
```
</Example 1>
IMPORTANT: Notice in Example 1 that the output starts with a newline because the cursor was at the end of a comment. This newline is REQUIRED to maintain proper Python formatting.
<Example 2>
Defined Variables: {{
df: pd.DataFrame({{
'age': [20, 25, 22, 23, 29],
'name': ['Nawaz', 'Aaron', 'Charlie', 'Tamir', 'Eve'],
}})
}}
Code in the active code cell:
```python
df['age'] = df[<cursor>['age'] > 23]
```
Output:
```python
df['age'] = df[df['age'] > 23]
```
</Example 2>
IMPORTANT: Notice in Example 2 that the output does NOT start with a newline because the cursor is in the middle of existing code.
<Example 3>
Defined Variables: {{}}
Code in the active code cell:
```python
voters = pd.read_csv('./voters.csv')
# Create a variable for pennsylvania voters, ohio voters, california voters, and texas voters
pa_voters = voters[voters['state'] == 'PA']
ohio_voters<cursor>
```
Output:
```python
ohio_voters = voters[voters['state'] == 'OH']
ca_voters = voters[voters['state'] == 'CA']
tx_voters = voters[voters['state'] == 'TX']
```
IMPORTANT: Notice in Example 3 that output does not start with a newline character because it wasnts to continue the line of code that the user started. Also notice the output contains three lines of code because that is the minimal code to achieve the user's intent.
</Example 3>
<Example 4>
Defined Variables: {{}}
Code in the active code cell:
```python
# Display the first 5 rows of the dataframe
df.head()
<cursor>
```
Output:
```python
```
</Example 4>
IMPORTANT: Notice in Example 4 that the output is empty becuase the user's intent is already complete.
<Example 5>
Defined Variables: {{}}
Code in the active code cell:
```python
def even_and_odd():
for i in range(10):
if i % 2 == 0:
print(f"Even")
else:
pri<cursor>
```
Output:
```python
print(f"Odd")
```
</Example 5>
IMPORTANT: Notice in Example 5 that the output is indented several times because the code must be executed as part of the else block.
<Example 6>
Defined Variables: {{}}
Code in the active code cell:
```python
days_in_week <cursor>
```
Output:
```python
days_in_week = 7
```
</Example 6>
IMPORTANT: Notice in Example 6 that inorder to finish the variable declaration, the output continues the existing line of code and does not start with a new line character.
Your Task:
Defined Variables: {notebook_state.global_vars}
Code in the active code cell:
```python
{prefix}<cursor>{suffix}
```
Output:
"""

def post_process_output(self, output: str, prefix: str, suffix: str) -> str:

last_prefix_line = prefix.split("\n")[-1]
if output.startswith(last_prefix_line) and last_prefix_line != "":
# Remove the last line of the prefix if it is the same as the first line of the output
output = output[len(last_prefix_line):]

first_suffix_line = suffix.split("\n")[0]
if output.endswith(first_suffix_line) and first_suffix_line != "":
# Remove the first line of the suffix if it is the same as the last line of the output
output = output[:-len(first_suffix_line)]

return output

prod_prompt_v4 = _ProdPromptV4()


Loading

0 comments on commit f0e6fb5

Please sign in to comment.