diff --git a/src/crewai/cli/cli.py b/src/crewai/cli/cli.py index 7b025a5679..c9f03f3fbc 100644 --- a/src/crewai/cli/cli.py +++ b/src/crewai/cli/cli.py @@ -5,11 +5,11 @@ KickoffTaskOutputsSQLiteStorage, ) - from .create_crew import create_crew -from .train_crew import train_crew from .replay_from_task import replay_task_command from .reset_memories_command import reset_memories_command +from .test_crew import test_crew +from .train_crew import train_crew @click.group() @@ -126,5 +126,26 @@ def reset_memories(long, short, entities, kickoff_outputs, all): click.echo(f"An error occurred while resetting memories: {e}", err=True) +@crewai.command() +@click.option( + "-n", + "--n_iterations", + type=int, + default=3, + help="Number of iterations to Test the crew", +) +@click.option( + "-m", + "--model", + type=str, + default="gpt-4o-mini", + help="LLM Model to run the tests on the Crew. For now only accepting only OpenAI models.", +) +def test(n_iterations: int, model: str): + """Test the crew and evaluate the results.""" + click.echo(f"Testing the crew for {n_iterations} iterations with model {model}") + test_crew(n_iterations, model) + + if __name__ == "__main__": crewai() diff --git a/src/crewai/cli/templates/main.py b/src/crewai/cli/templates/main.py index 2ec1cc6b09..f300f49120 100644 --- a/src/crewai/cli/templates/main.py +++ b/src/crewai/cli/templates/main.py @@ -39,3 +39,16 @@ def replay(): except Exception as e: raise Exception(f"An error occurred while replaying the crew: {e}") + +def test(): + """ + Test the crew execution and returns the results. + """ + inputs = { + "topic": "AI LLMs" + } + try: + {{crew_name}}Crew().crew().test(n_iterations=int(sys.argv[1]), model=sys.argv[2], inputs=inputs) + + except Exception as e: + raise Exception(f"An error occurred while replaying the crew: {e}") diff --git a/src/crewai/cli/templates/pyproject.toml b/src/crewai/cli/templates/pyproject.toml index 12fcffa3b7..33781e14db 100644 --- a/src/crewai/cli/templates/pyproject.toml +++ b/src/crewai/cli/templates/pyproject.toml @@ -12,6 +12,7 @@ crewai = { extras = ["tools"], version = "^0.41.1" } {{folder_name}} = "{{folder_name}}.main:run" train = "{{folder_name}}.main:train" replay = "{{folder_name}}.main:replay" +test = "{{folder_name}}.main:test" [build-system] requires = ["poetry-core"] diff --git a/src/crewai/cli/test_crew.py b/src/crewai/cli/test_crew.py new file mode 100644 index 0000000000..6743b70048 --- /dev/null +++ b/src/crewai/cli/test_crew.py @@ -0,0 +1,30 @@ +import subprocess + +import click + + +def test_crew(n_iterations: int, model: str) -> None: + """ + Test the crew by running a command in the Poetry environment. + + Args: + n_iterations (int): The number of iterations to test the crew. + model (str): The model to test the crew with. + """ + command = ["poetry", "run", "test", str(n_iterations), model] + + try: + if n_iterations <= 0: + raise ValueError("The number of iterations must be a positive integer.") + + result = subprocess.run(command, capture_output=False, text=True, check=True) + + if result.stderr: + click.echo(result.stderr, err=True) + + except subprocess.CalledProcessError as e: + click.echo(f"An error occurred while testing the crew: {e}", err=True) + click.echo(e.output, err=True) + + except Exception as e: + click.echo(f"An unexpected error occurred: {e}", err=True) diff --git a/src/crewai/crew.py b/src/crewai/crew.py index fc31a67744..e286b47f99 100644 --- a/src/crewai/crew.py +++ b/src/crewai/crew.py @@ -966,5 +966,11 @@ def calculate_usage_metrics(self) -> Dict[str, int]: return total_usage_metrics + def test( + self, n_iterations: int, model: str, inputs: Optional[Dict[str, Any]] = None + ) -> None: + """Test the crew with the given inputs.""" + pass + def __repr__(self): return f"Crew(id={self.id}, process={self.process}, number_of_agents={len(self.agents)}, number_of_tasks={len(self.tasks)})" diff --git a/tests/cli/cli_test.py b/tests/cli/cli_test.py index f877f913a8..504975dc7f 100644 --- a/tests/cli/cli_test.py +++ b/tests/cli/cli_test.py @@ -3,7 +3,7 @@ import pytest from click.testing import CliRunner -from crewai.cli.cli import train, version, reset_memories +from crewai.cli.cli import reset_memories, test, train, version @pytest.fixture @@ -133,3 +133,33 @@ def test_version_command_with_tools(runner): "crewai tools version:" in result.output or "crewai tools not installed" in result.output ) + + +@mock.patch("crewai.cli.cli.test_crew") +def test_test_default_iterations(test_crew, runner): + result = runner.invoke(test) + + test_crew.assert_called_once_with(3, "gpt-4o-mini") + assert result.exit_code == 0 + assert "Testing the crew for 3 iterations with model gpt-4o-mini" in result.output + + +@mock.patch("crewai.cli.cli.test_crew") +def test_test_custom_iterations(test_crew, runner): + result = runner.invoke(test, ["--n_iterations", "5", "--model", "gpt-4o"]) + + test_crew.assert_called_once_with(5, "gpt-4o") + assert result.exit_code == 0 + assert "Testing the crew for 5 iterations with model gpt-4o" in result.output + + +@mock.patch("crewai.cli.cli.test_crew") +def test_test_invalid_string_iterations(test_crew, runner): + result = runner.invoke(test, ["--n_iterations", "invalid"]) + + test_crew.assert_not_called() + assert result.exit_code == 2 + assert ( + "Usage: test [OPTIONS]\nTry 'test --help' for help.\n\nError: Invalid value for '-n' / '--n_iterations': 'invalid' is not a valid integer.\n" + in result.output + ) diff --git a/tests/cli/test_crew_test.py b/tests/cli/test_crew_test.py new file mode 100644 index 0000000000..90649710a1 --- /dev/null +++ b/tests/cli/test_crew_test.py @@ -0,0 +1,97 @@ +import subprocess +from unittest import mock + +import pytest + +from crewai.cli import test_crew + + +@pytest.mark.parametrize( + "n_iterations,model", + [ + (1, "gpt-4o"), + (5, "gpt-3.5-turbo"), + (10, "gpt-4"), + ], +) +@mock.patch("crewai.cli.test_crew.subprocess.run") +def test_crew_success(mock_subprocess_run, n_iterations, model): + """Test the crew function for successful execution.""" + mock_subprocess_run.return_value = subprocess.CompletedProcess( + args=f"poetry run test {n_iterations} {model}", returncode=0 + ) + result = test_crew.test_crew(n_iterations, model) + + mock_subprocess_run.assert_called_once_with( + ["poetry", "run", "test", str(n_iterations), model], + capture_output=False, + text=True, + check=True, + ) + assert result is None + + +@mock.patch("crewai.cli.test_crew.click") +def test_test_crew_zero_iterations(click): + test_crew.test_crew(0, "gpt-4o") + click.echo.assert_called_once_with( + "An unexpected error occurred: The number of iterations must be a positive integer.", + err=True, + ) + + +@mock.patch("crewai.cli.test_crew.click") +def test_test_crew_negative_iterations(click): + test_crew.test_crew(-2, "gpt-4o") + click.echo.assert_called_once_with( + "An unexpected error occurred: The number of iterations must be a positive integer.", + err=True, + ) + + +@mock.patch("crewai.cli.test_crew.click") +@mock.patch("crewai.cli.test_crew.subprocess.run") +def test_test_crew_called_process_error(mock_subprocess_run, click): + n_iterations = 5 + mock_subprocess_run.side_effect = subprocess.CalledProcessError( + returncode=1, + cmd=["poetry", "run", "test", str(n_iterations), "gpt-4o"], + output="Error", + stderr="Some error occurred", + ) + test_crew.test_crew(n_iterations, "gpt-4o") + + mock_subprocess_run.assert_called_once_with( + ["poetry", "run", "test", "5", "gpt-4o"], + capture_output=False, + text=True, + check=True, + ) + click.echo.assert_has_calls( + [ + mock.call.echo( + "An error occurred while testing the crew: Command '['poetry', 'run', 'test', '5', 'gpt-4o']' returned non-zero exit status 1.", + err=True, + ), + mock.call.echo("Error", err=True), + ] + ) + + +@mock.patch("crewai.cli.test_crew.click") +@mock.patch("crewai.cli.test_crew.subprocess.run") +def test_test_crew_unexpected_exception(mock_subprocess_run, click): + # Arrange + n_iterations = 5 + mock_subprocess_run.side_effect = Exception("Unexpected error") + test_crew.test_crew(n_iterations, "gpt-4o") + + mock_subprocess_run.assert_called_once_with( + ["poetry", "run", "test", "5", "gpt-4o"], + capture_output=False, + text=True, + check=True, + ) + click.echo.assert_called_once_with( + "An unexpected error occurred: Unexpected error", err=True + )