From 1e7f2bd12f7d5158a4d360762c99676d695e4218 Mon Sep 17 00:00:00 2001 From: Mateus Perfigao Domiciano Date: Tue, 8 Jul 2025 20:01:14 -0300 Subject: [PATCH 1/2] Refactor CodeQL analysis to use create_and_analyze method and enhance error handling for corrupted databases --- .../use_cases/codeql_analysis_use_case.py | 40 ++++++------------- .../infrastructure/codeql_runner.py | 31 +++++++++++--- tests/test_codeql_analysis.py | 9 ++--- tests/test_codeql_runner.py | 27 ++++++++----- 4 files changed, 61 insertions(+), 46 deletions(-) diff --git a/src/codeql_wrapper/domain/use_cases/codeql_analysis_use_case.py b/src/codeql_wrapper/domain/use_cases/codeql_analysis_use_case.py index 726bd60..a1eac63 100644 --- a/src/codeql_wrapper/domain/use_cases/codeql_analysis_use_case.py +++ b/src/codeql_wrapper/domain/use_cases/codeql_analysis_use_case.py @@ -270,31 +270,12 @@ def _analyze_project( for language in project.languages: self._logger.debug(f"Running CodeQL analysis for {language.value}") - # Create database - db_path = output_dir / f"db-{language.value}" + # Create database and run analysis using the robust method if self._codeql_runner is None: raise Exception("CodeQL runner not initialized") - db_result = self._codeql_runner.create_database( - database_path=str(db_path), - source_root=str(project.path), - language=language.value, - overwrite=True, - ) - - if not db_result.success: - error_msg = ( - f"Failed to create database for {language.value}: " - f"{db_result.stderr}" - ) - self._logger.error(error_msg) - result.status = AnalysisStatus.FAILED - result.error_message = error_msg - result.end_time = datetime.now() - return result - - # Run analysis - output_format = "sarif-latest" # Default output format + # Default output format + output_format = "sarif-latest" # Map output formats to conventional file extensions format_to_extension = { @@ -309,15 +290,20 @@ def _analyze_project( file_extension = format_to_extension.get(output_format, ".sarif") output_file = output_dir / f"results-{language.value}{file_extension}" - analysis_result = self._codeql_runner.analyze_database( - database_path=str(db_path), - output_format=output_format, - output=str(output_file), + + # Use the robust create_and_analyze method that handles corrupted databases + analysis_result = self._codeql_runner.create_and_analyze( + source_root=str(project.path), + language=language.value, + output_file=str(output_file), + database_name=str(output_dir / f"db-{language.value}"), + cleanup_database=False, # Keep database for debugging ) if not analysis_result.success: error_msg = ( - f"Failed to analyze {language.value}: {analysis_result.stderr}" + f"Failed to create database and analyze {language.value}: " + f"{analysis_result.stderr}" ) self._logger.error(error_msg) result.status = AnalysisStatus.FAILED diff --git a/src/codeql_wrapper/infrastructure/codeql_runner.py b/src/codeql_wrapper/infrastructure/codeql_runner.py index c1f2af4..498c89b 100644 --- a/src/codeql_wrapper/infrastructure/codeql_runner.py +++ b/src/codeql_wrapper/infrastructure/codeql_runner.py @@ -1,6 +1,7 @@ """CodeQL runner infrastructure module.""" import subprocess +import shutil from pathlib import Path from typing import List, Optional from dataclasses import dataclass @@ -355,8 +356,31 @@ def create_and_analyze( ) if not create_result.success: - self.logger.error(f"Database creation failed: {create_result.stderr}") - return create_result + # Check if this is the specific corrupted database error + if ("Unrecognized file in database cluster" in create_result.stderr or + "does not appear to be a CodeQL database" in create_result.stderr): + self.logger.warning( + "Detected corrupted database directory, removing and retrying..." + ) + + # Remove the corrupted directory + if database_path.exists(): + try: + shutil.rmtree(database_path, ignore_errors=True) + self.logger.info(f"Removed corrupted database directory: {database_path}") + except Exception as e: + self.logger.error(f"Failed to remove corrupted directory: {e}") + return create_result + + # Retry database creation + self.logger.info("Retrying database creation...") + create_result = self.create_database( + str(database_path), source_root, language, build_command, overwrite=True + ) + + if not create_result.success: + self.logger.error(f"Database creation failed: {create_result.stderr}") + return create_result self.logger.info("Database created successfully") @@ -381,6 +405,3 @@ def create_and_analyze( # Cleanup database if requested if cleanup_database and database_path.exists(): self.logger.info(f"Cleaning up database at {database_path}") - import shutil - - shutil.rmtree(database_path, ignore_errors=True) diff --git a/tests/test_codeql_analysis.py b/tests/test_codeql_analysis.py index 2d56081..0b3e478 100644 --- a/tests/test_codeql_analysis.py +++ b/tests/test_codeql_analysis.py @@ -398,8 +398,7 @@ def test_analyze_project_full_workflow(self) -> None: # Mock CodeQL runner and all necessary methods mock_runner = Mock() - mock_runner.create_database.return_value = Mock(success=True) - mock_runner.analyze_database.return_value = Mock(success=True) + mock_runner.create_and_analyze.return_value = Mock(success=True) self.use_case._codeql_runner = mock_runner @@ -444,8 +443,8 @@ def test_analyze_project_failure(self) -> None: # Mock CodeQL runner to fail mock_runner = Mock() - mock_runner.create_database.return_value = Mock( - success=False, stderr="Database creation failed" + mock_runner.create_and_analyze.return_value = Mock( + success=False, stderr="Database creation and analysis failed" ) self.use_case._codeql_runner = mock_runner @@ -459,7 +458,7 @@ def test_analyze_project_failure(self) -> None: assert result.findings_count == 0 assert ( result.error_message is not None - and "Database creation failed" in result.error_message + and "Database creation and analysis failed" in result.error_message ) self.use_case._codeql_runner = mock_runner diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py index 16d3839..7720146 100644 --- a/tests/test_codeql_runner.py +++ b/tests/test_codeql_runner.py @@ -511,19 +511,28 @@ def test_create_and_analyze_analysis_fails( ) assert result.success is False + assert "Analysis failed" in result.stderr assert result.stderr == "Analysis failed" - assert mock_run.call_count == 2 @patch("subprocess.run") - def test_create_and_analyze_with_custom_database_path(self, mock_run) -> None: + @patch("pathlib.Path.exists") + def test_create_and_analyze_with_custom_database_path(self, mock_exists, mock_run) -> None: """Test create and analyze with custom database path.""" - # Mock successful database creation and analysis - success_result = Mock() - success_result.returncode = 0 - success_result.stdout = "Success" - success_result.stderr = "" + # Mock that database doesn't exist + mock_exists.return_value = False + + # Mock successful create and analyze + create_result = Mock() + create_result.returncode = 0 + create_result.stdout = "Database created" + create_result.stderr = "" - mock_run.return_value = success_result + analyze_result = Mock() + analyze_result.returncode = 0 + analyze_result.stdout = "Analysis complete" + analyze_result.stderr = "" + + mock_run.side_effect = [create_result, analyze_result] result = self.runner.create_and_analyze( "/source", @@ -534,7 +543,7 @@ def test_create_and_analyze_with_custom_database_path(self, mock_run) -> None: ) assert result.success is True - assert mock_run.call_count == 2 + assert mock_run.call_count == 2 # create, analyze # Check that the custom database path was used create_call_args = mock_run.call_args_list[0][0][0] From 8a0b0083a27e0b2917f5f7c1ff8c5a7a0476761d Mon Sep 17 00:00:00 2001 From: Mateus Perfigao Domiciano Date: Tue, 8 Jul 2025 20:08:01 -0300 Subject: [PATCH 2/2] fix lint --- .../infrastructure/codeql_runner.py | 24 ++++++++++++++----- tests/test_codeql_runner.py | 6 +++-- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/codeql_wrapper/infrastructure/codeql_runner.py b/src/codeql_wrapper/infrastructure/codeql_runner.py index 498c89b..d0e9fb5 100644 --- a/src/codeql_wrapper/infrastructure/codeql_runner.py +++ b/src/codeql_wrapper/infrastructure/codeql_runner.py @@ -357,8 +357,10 @@ def create_and_analyze( if not create_result.success: # Check if this is the specific corrupted database error - if ("Unrecognized file in database cluster" in create_result.stderr or - "does not appear to be a CodeQL database" in create_result.stderr): + if ( + "Unrecognized file in database cluster" in create_result.stderr + or "does not appear to be a CodeQL database" in create_result.stderr + ): self.logger.warning( "Detected corrupted database directory, removing and retrying..." ) @@ -367,19 +369,29 @@ def create_and_analyze( if database_path.exists(): try: shutil.rmtree(database_path, ignore_errors=True) - self.logger.info(f"Removed corrupted database directory: {database_path}") + self.logger.info( + f"Removed corrupted database directory: {database_path}" + ) except Exception as e: - self.logger.error(f"Failed to remove corrupted directory: {e}") + self.logger.error( + f"Failed to remove corrupted directory: {e}" + ) return create_result # Retry database creation self.logger.info("Retrying database creation...") create_result = self.create_database( - str(database_path), source_root, language, build_command, overwrite=True + str(database_path), + source_root, + language, + build_command, + overwrite=True, ) if not create_result.success: - self.logger.error(f"Database creation failed: {create_result.stderr}") + self.logger.error( + f"Database creation failed: {create_result.stderr}" + ) return create_result self.logger.info("Database created successfully") diff --git a/tests/test_codeql_runner.py b/tests/test_codeql_runner.py index 7720146..686dbf4 100644 --- a/tests/test_codeql_runner.py +++ b/tests/test_codeql_runner.py @@ -516,11 +516,13 @@ def test_create_and_analyze_analysis_fails( @patch("subprocess.run") @patch("pathlib.Path.exists") - def test_create_and_analyze_with_custom_database_path(self, mock_exists, mock_run) -> None: + def test_create_and_analyze_with_custom_database_path( + self, mock_exists, mock_run + ) -> None: """Test create and analyze with custom database path.""" # Mock that database doesn't exist mock_exists.return_value = False - + # Mock successful create and analyze create_result = Mock() create_result.returncode = 0