AntonOsika · viborc · Aug 8, 2024 · Jul 23, 2024 · Jul 23, 2024 · Jul 23, 2024
diff --git a/gpt_engineer/applications/cli/cli_agent.py b/gpt_engineer/applications/cli/cli_agent.py
@@ -109,6 +109,7 @@ def with_default_config(
         improve_fn: ImproveType = improve_fn,
         process_code_fn: CodeProcessor = execute_entrypoint,
         preprompts_holder: PrepromptsHolder = None,
+        diff_timeout=3,
     ):
         """
         Creates a new instance of CliAgent with default configurations for memory, execution environment,
@@ -186,6 +187,7 @@ def improve(
         files_dict: FilesDict,
         prompt: Prompt,
         execution_command: Optional[str] = None,
+        diff_timeout=3,
     ) -> FilesDict:
         """
         Improves an existing piece of code using the AI and step bundle based on the provided prompt.
@@ -206,7 +208,12 @@ def improve(
         """
 
         files_dict = self.improve_fn(
-            self.ai, prompt, files_dict, self.memory, self.preprompts_holder
+            self.ai,
+            prompt,
+            files_dict,
+            self.memory,
+            self.preprompts_holder,
+            diff_timeout=diff_timeout,
         )
         # entrypoint = gen_entrypoint(
         #     self.ai, prompt, files_dict, self.memory, self.preprompts_holder

diff --git a/gpt_engineer/applications/cli/file_selector.py b/gpt_engineer/applications/cli/file_selector.py
@@ -76,7 +76,7 @@ def __init__(self, project_path: Union[str, Path]):
         self.metadata_db = DiskMemory(metadata_path(self.project_path))
         self.toml_path = self.metadata_db.path / self.FILE_LIST_NAME
 
-    def ask_for_files(self) -> tuple[FilesDict, bool]:
+    def ask_for_files(self, skip_file_selection=False) -> tuple[FilesDict, bool]:
         """
         Prompts the user to select files for context improvement.
 
@@ -89,8 +89,9 @@ def ask_for_files(self) -> tuple[FilesDict, bool]:
             A dictionary with file paths as keys and file contents as values.
         """
 
-        if os.getenv("GPTE_TEST_MODE"):
+        if os.getenv("GPTE_TEST_MODE") or skip_file_selection:
             # In test mode, retrieve files from a predefined TOML configuration
+            # also get from toml if skip_file_selector is active
             assert self.FILE_LIST_NAME in self.metadata_db
             selected_files = self.get_files_from_toml(self.project_path, self.toml_path)
         else:
@@ -412,7 +413,7 @@ def get_current_files(self, project_path: Union[str, Path]) -> List[str]:
         if is_git_repo(project_path) and "projects" not in project_path.parts:
             all_files = filter_by_gitignore(project_path, all_files)
 
-        return all_files
+        return sorted(all_files, key=lambda x: Path(x).as_posix())
 
 
 class DisplayablePath(object):

diff --git a/gpt_engineer/applications/cli/main.py b/gpt_engineer/applications/cli/main.py
@@ -357,6 +357,12 @@ def main(
         "--use_cache",
         help="Speeds up computations and saves tokens when running the same prompt multiple times by caching the LLM response.",
     ),
+    skip_file_selection: bool = typer.Option(
+        False,
+        "--skip-file-selection",
+        "-s",
+        help="Skip interactive file selection in improve mode and use the generated TOML file directly.",
+    ),
     no_execution: bool = typer.Option(
         False,
         "--no_execution",
@@ -367,6 +373,11 @@ def main(
         "--sysinfo",
         help="Output system information for debugging",
     ),
+    diff_timeout: int = typer.Option(
+        3,
+        "--diff_timeout",
+        help="Diff regexp timeout. Default: 3. Increase if regexp search timeouts.",
+    ),
 ):
     """
     The main entry point for the CLI tool that generates or improves a project.
@@ -405,6 +416,8 @@ def main(
         Speeds up computations and saves tokens when running the same prompt multiple times by caching the LLM response.
     verbose : bool
         Flag indicating whether to enable verbose logging.
+    skip_file_selection: bool
+        Skip interactive file selection in improve mode and use the generated TOML file directly
     no_execution: bool
         Run setup but to not call LLM or write any code. For testing purposes.
     sysinfo: bool
@@ -501,13 +514,17 @@ def main(
     files = FileStore(project_path)
     if not no_execution:
         if improve_mode:
-            files_dict_before, is_linting = FileSelector(project_path).ask_for_files()
+            files_dict_before, is_linting = FileSelector(project_path).ask_for_files(
+                skip_file_selection=skip_file_selection
+            )
 
             # lint the code
             if is_linting:
                 files_dict_before = files.linting(files_dict_before)
 
-            files_dict = handle_improve_mode(prompt, agent, memory, files_dict_before)
+            files_dict = handle_improve_mode(
+                prompt, agent, memory, files_dict_before, diff_timeout=diff_timeout
+            )
             if not files_dict or files_dict_before == files_dict:
                 print(
                     f"No changes applied. Could you please upload the debug_log_file.txt in {memory.path}/logs folder in a github issue?"

diff --git a/gpt_engineer/core/chat_to_files.py b/gpt_engineer/core/chat_to_files.py
@@ -120,7 +120,7 @@ def apply_diffs(diffs: Dict[str, Diff], files: FilesDict) -> FilesDict:
     return files
 
 
-def parse_diffs(diff_string: str) -> dict:
+def parse_diffs(diff_string: str, diff_timeout=3) -> dict:
     """
     Parses a diff string in the unified git diff format.
 
@@ -138,7 +138,7 @@ def parse_diffs(diff_string: str) -> dict:
 
     diffs = {}
     try:
-        for block in diff_block_pattern.finditer(diff_string, timeout=1):
+        for block in diff_block_pattern.finditer(diff_string, timeout=diff_timeout):
             diff_block = block.group()
 
             # Parse individual diff blocks and update the diffs dictionary

diff --git a/gpt_engineer/core/default/steps.py b/gpt_engineer/core/default/steps.py
@@ -274,6 +274,7 @@ def improve_fn(
     files_dict: FilesDict,
     memory: BaseMemory,
     preprompts_holder: PrepromptsHolder,
+    diff_timeout=3,
 ) -> FilesDict:
     """
     Improves the code based on user input and returns the updated files.
@@ -308,14 +309,16 @@ def improve_fn(
         DEBUG_LOG_FILE,
         "UPLOADED FILES:\n" + files_dict.to_log() + "\nPROMPT:\n" + prompt.text,
     )
-    return _improve_loop(ai, files_dict, memory, messages)
+    return _improve_loop(ai, files_dict, memory, messages, diff_timeout=diff_timeout)
 
 
 def _improve_loop(
-    ai: AI, files_dict: FilesDict, memory: BaseMemory, messages: List
+    ai: AI, files_dict: FilesDict, memory: BaseMemory, messages: List, diff_timeout=3
 ) -> FilesDict:
     messages = ai.next(messages, step_name=curr_fn())
-    files_dict, errors = salvage_correct_hunks(messages, files_dict, memory)
+    files_dict, errors = salvage_correct_hunks(
+        messages, files_dict, memory, diff_timeout=diff_timeout
+    )
 
     retries = 0
     while errors and retries < MAX_EDIT_REFINEMENT_STEPS:
@@ -327,21 +330,21 @@ def _improve_loop(
             )
         )
         messages = ai.next(messages, step_name=curr_fn())
-        files_dict, errors = salvage_correct_hunks(messages, files_dict, memory)
+        files_dict, errors = salvage_correct_hunks(
+            messages, files_dict, memory, diff_timeout
+        )
         retries += 1
 
     return files_dict
 
 
 def salvage_correct_hunks(
-    messages: List,
-    files_dict: FilesDict,
-    memory: BaseMemory,
+    messages: List, files_dict: FilesDict, memory: BaseMemory, diff_timeout=3
 ) -> tuple[FilesDict, List[str]]:
     error_messages = []
     ai_response = messages[-1].content.strip()
 
-    diffs = parse_diffs(ai_response)
+    diffs = parse_diffs(ai_response, diff_timeout=diff_timeout)
     # validate and correct diffs
 
     for _, diff in diffs.items():
@@ -370,13 +373,13 @@ def flush(self):
             file.flush()
 
 
-def handle_improve_mode(prompt, agent, memory, files_dict):
+def handle_improve_mode(prompt, agent, memory, files_dict, diff_timeout=3):
     captured_output = io.StringIO()
     old_stdout = sys.stdout
     sys.stdout = Tee(sys.stdout, captured_output)
 
     try:
-        files_dict = agent.improve(files_dict, prompt)
+        files_dict = agent.improve(files_dict, prompt, diff_timeout=diff_timeout)
     except Exception as e:
         print(
             f"Error while improving the project: {e}\nCould you please upload the debug_log_file.txt in {memory.path}/logs folder to github?\nFULL STACK TRACE:\n"

diff --git a/gpt_engineer/tools/custom_steps.py b/gpt_engineer/tools/custom_steps.py
@@ -44,6 +44,7 @@ def self_heal(
     prompt: Prompt = None,
     preprompts_holder: PrepromptsHolder = None,
     memory: BaseMemory = None,
+    diff_timeout=3,
 ) -> FilesDict:
     """
     Attempts to execute the code from the entrypoint and if it fails, sends the error output back to the AI with instructions to fix.
@@ -111,7 +112,7 @@ def self_heal(
                 f"A program with this specification was requested:\n{prompt}\n, but running it produced the following output:\n{stdout_full}\n and the following errors:\n{stderr_full}. Please change it so that it fulfills the requirements."
             )
             files_dict = improve_fn(
-                ai, new_prompt, files_dict, memory, preprompts_holder
+                ai, new_prompt, files_dict, memory, preprompts_holder, diff_timeout
             )
         else:
             break

diff --git a/gpt_engineer/tools/supported_languages.py b/gpt_engineer/tools/supported_languages.py
@@ -46,7 +46,9 @@
         "extensions": [".cpp", ".cc", ".cxx", ".h", ".hpp", ".hxx"],
         "tree_sitter_name": "cpp",
     },
-    {"name": "C", "extensions": [".c", ".h"], "tree_sitter_name": "c"}
+    {"name": "C", "extensions": [".c", ".h"], "tree_sitter_name": "c"},
+    {"name": "Markdown", "extensions": [".md"], "tree_sitter_name": "md"},
+    {"name": "Arduino C", "extensions": [".ino"], "tree_sitter_name": "ino"}
     # ---- the following are not supported by the current code chunker implementation ----
     # {
     #     "name": "Swift",

diff --git a/tests/applications/cli/test_main.py b/tests/applications/cli/test_main.py
@@ -92,6 +92,36 @@ def test_improve_existing_project(self, tmp_path, monkeypatch):
         )
         args()
 
+    #  Runs gpt-engineer with improve mode and improves an existing project in the specified path, with skip_file_selection
+    def test_improve_existing_project_skip_file_selection(self, tmp_path, monkeypatch):
+        p = tmp_path / "projects/example"
+        p.mkdir(parents=True)
+        (p / "prompt").write_text(prompt_text)
+        args = DefaultArgumentsMain(
+            str(p),
+            improve_mode=True,
+            llm_via_clipboard=True,
+            no_execution=True,
+            skip_file_selection=True,
+        )
+        args()
+        assert args.skip_file_selection, "Skip_file_selection not set"
+
+    #  Runs gpt-engineer with improve mode and improves an existing project in the specified path, with skip_file_selection
+    def test_improve_existing_project_diff_timeout(self, tmp_path, monkeypatch):
+        p = tmp_path / "projects/example"
+        p.mkdir(parents=True)
+        (p / "prompt").write_text(prompt_text)
+        args = DefaultArgumentsMain(
+            str(p),
+            improve_mode=True,
+            llm_via_clipboard=True,
+            no_execution=True,
+            diff_timeout=99,
+        )
+        args()
+        assert args.diff_timeout == 99, "Diff timeout not set"
+
         # def improve_generator():
         #     yield "y"
         #     while True:

diff --git a/tests/core/test_file_selector_enhancements.py b/tests/core/test_file_selector_enhancements.py
@@ -0,0 +1,59 @@
+import os
+
+from pathlib import Path
+from typing import List, Union
+
+from gpt_engineer.applications.cli.file_selector import FileSelector
+
+editorcalled = False
+
+
+def set_editor_called(
+    self, input_path: Union[str, Path], init: bool = True
+) -> List[str]:
+    global editorcalled
+    editorcalled = True
+    return []
+
+
+def set_file_selector_tmpproject(tmp_path):
+    project_path = tmp_path / "project/"
+    os.mkdir(project_path)
+    os.mkdir(project_path / "x")
+    os.mkdir(project_path / "a")
+
+    gpteng_path = project_path / ".gpteng"
+    os.mkdir(gpteng_path)
+
+    with open(gpteng_path / "file_selection.toml", "w") as file:
+        file.write("[files]\n")
+        file.write(' "x/xxtest.py" = "selected"\n')
+        file.write(' "a/aatest.py" = "selected"\n')
+
+    with open(project_path / "x/xxtest.py", "w") as file:
+        file.write('print("Hello")')
+
+    with open(project_path / "a/aatest.py", "w") as file:
+        file.write('print("Hello")')
+
+    return project_path
+
+
+def test_file_selector_enhancement_skip_file_selector(tmp_path):
+    project_path = set_file_selector_tmpproject(tmp_path)
+    fileSelector = FileSelector(project_path=project_path)
+    fileSelector.editor_file_selector = set_editor_called
+    fileSelector.ask_for_files(skip_file_selection=True)
+
+    assert editorcalled is False, "FileSelector.skip_file_selector is not working"
+
+
+def test_file_selector_enhancement_sort(tmp_path):
+    project_path = set_file_selector_tmpproject(tmp_path)
+    fileSelector = FileSelector(project_path=project_path)
+
+    sortedFiles = fileSelector.get_current_files(project_path)
+    assert sortedFiles == [
+        "a/aatest.py",
+        "x/xxtest.py",
+    ], "FileSelector.get_current_files is unsorted!"