added filter for hidden cicd files

xvnpw · Dec 10, 2024 · c770861 · c770861
1 parent d000df4
commit c770861
Show file tree

Hide file tree

Showing 10 changed files with 155 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -21,12 +21,12 @@
 
 ## Overview
 
-**AI Security Analyzer** is a Python-based tool that analyzes your project's codebase and automatically generates detailed security documentation. It supports multiple analysis types including security design documentation and threat modeling. The tool supports multiple project types and utilizes advanced language models (LLMs) to create insightful security documentation tailored to your project's specific needs.
+**AI Security Analyzer** is a Python-based tool that analyzes your project's codebase and automatically generates detailed security documentation. It supports multiple analysis types including security design documentation, threat modeling, attack surface analysis, and threat scenarios. The tool supports multiple project types and utilizes advanced language models (LLMs) to create insightful security documentation tailored to your project's specific needs.
 
 ## Features
 
 - 🔍 **Intelligent Code Analysis**: Automatically analyzes your project's codebase for security considerations.
-- 📝 **Automated Documentation Generation**: Generates comprehensive security design and threat modeling documents.
+- 📝 **Automated Documentation Generation**: Generates comprehensive security design, threat modeling, attack surface analysis, and threat scenarios documents.
 - 🔐 **Security-Focused Insights**: Provides detailed insights into potential security risks and design patterns.
 - 🔄 **Multi-Project Support**: Supports Python, Go project types.
 - 🤖 **Multiple LLM Provider Support**: Compatible with OpenAI, OpenRouter, and Anthropic models.
@@ -181,6 +181,8 @@ The application accepts various command-line arguments to tailor its behavior.
 - `--agent-prompt-type`: Prompt to use in agent (default: `sec-design`). Options are:
   - `sec-design`: Generate a security design document for the project.
   - `threat-modeling`: Perform threat modeling for the project.
+  - `attack-surface`: Perform attack surface analysis for the project.
+  - `threat-scenarios`: Perform threat scenarios analysis for the project using Daniel Miessler's [prompt](https://github.com/danielmiessler/fabric/blob/f5f50cc4c94a539ee56bc533e9b1194eb9aa424d/patterns/create_threat_scenarios/system.md).
 - `--files-context-window`: Maximum token size for LLM context window. Automatically determined if not set.
 - `--files-chunk-size`: Chunk size in tokens for splitting files. Automatically determined if not set.
 
@@ -261,13 +263,35 @@ poetry run python ai_security_analyzer/app.py \
 ```
 =========== dry-run ===========
 All documents token count: 123456
-List of chunked files to analyse:
+List of chunked files to analyze:
 src/main.py
 src/utils.py
 README.md
 ...
 ```
 
+## Real World Examples
+
+Check [examples](https://github.com/xvnpw/ai-security-analyzer/blob/main/examples/README.md) for real world examples, e.g. flask framework, requests library, etc.
+
+## Supported Project Types
+
+- Python
+- Go
+- More to come...
+
+In case you want to use a project type that is not supported, please use the `generic` project type with `--include`, `--include-mode`, `--exclude`, `--exclude-mode` options.
+
+**Example:**
+
+```bash
+poetry run python ai_security_analyzer/app.py \
+    -t /path/to/your/project \
+    -o security_design.md \
+    --project-type generic \
+    --include "**/*.java"
+```
+
 ## Troubleshooting
 
 ### Common Issues

diff --git a/ai_security_analyzer/app.py b/ai_security_analyzer/app.py
@@ -157,7 +157,7 @@ def parse_arguments() -> AppConfig:
         "--agent-prompt-type",
         choices=["sec-design", "threat-modeling", "attack-surface", "threat-scenarios"],
         default="sec-design",
-        help="Prompt to use in agent (default: sec-design): sec-design - Security Design for Project, threat-modeling - Threat Modeling for Project",
+        help="Prompt to use in agent (default: sec-design): sec-design - Security Design, threat-modeling - Threat Modeling, attack-surface - Attack Surface Analysis, threat-scenarios - Threat Scenarios",
     )
 
     # Editor configuration

diff --git a/ai_security_analyzer/graphs.py b/ai_security_analyzer/graphs.py
@@ -84,7 +84,7 @@ def _write_output(self, state: dict[str, Any] | Any) -> None:
 
         output = f"""=========== dry-run ===========
 All documents token count: {token_count}
-List of chunked files to analyse:
+List of chunked files to analyze:
 {splitted_docs_str}
 """
         print(output)

diff --git a/ai_security_analyzer/loaders.py b/ai_security_analyzer/loaders.py
@@ -19,7 +19,7 @@
 }
 
 GENERIC_FILES_GLOB = ["**/*.md", "**/Dockerfile", "**/*.yml", "**/*.sh", "**/*.bash", "**/*.yaml"]
-PYTHON_FILES_GLOB = ["**/*.py", "pyproject.toml"]
+PYTHON_FILES_GLOB = ["**/*.py", "pyproject.toml", "requirements.txt"]
 GO_FILES_GLOB = ["**/*.go", "**/go.mod", "Makefile"]
 
 FILES_GLOB: dict[str, List[str]] = {
@@ -35,8 +35,10 @@ def _is_visible(p: Path) -> bool:
     return not any(part.startswith(".") for part in p.parts)
 
 
-def _is_github(p: Path) -> bool:
-    return p.parts[0] == ".github"
+# function to check if the file is ci/cd related, e.g. github actions, gitlab ci, etc.
+def _is_ci_cd(p: Path) -> bool:
+    CICD = [".github", ".gitlab", ".circleci", ".jenkins", ".drone", ".gitlab-ci", ".drone.yml"]
+    return any(p.parts[0].startswith(cicd) for cicd in CICD)
 
 
 class RepoDirectoryLoader(DirectoryLoader):
@@ -138,7 +140,7 @@ def _lazy_load_file(self, item: Path, path: Path, pbar: Optional[Any]) -> Iterat
         """
         if item.is_file():
             relative_path = item.relative_to(path)
-            if _is_visible(relative_path) or self.load_hidden or _is_github(relative_path):
+            if _is_visible(relative_path) or self.load_hidden or _is_ci_cd(relative_path):
                 try:
                     logger.debug(f"Processing file: {str(item)}")
                     loader_cls = LOADERS.get(item.suffix, TextLoader)

diff --git a/tests/test_graphs.py b/tests/test_graphs.py
@@ -70,7 +70,7 @@ def test_dry_run_graph_executor_success(capfd):
     # Build expected output
     expected_output = """=========== dry-run ===========
 All documents token count: 1234
-List of chunked files to analyse:
+List of chunked files to analyze:
 file1.py
 file2.py
 """

diff --git a/tests/test_loaders.py b/tests/test_loaders.py
@@ -2,23 +2,32 @@
 
 from ai_security_analyzer.loaders import RepoDirectoryLoader
 
+root_project_dir = Path(__file__).resolve().parent
+test_target_dir = root_project_dir / "testdata"
 
-def _in_path(substring, paths):
+
+def _anywhere_in_path(substring, paths):
     return any(substring in path for path in paths)
 
 
+def _in_path(substring, paths):
+    return any(str(Path(test_target_dir / substring)) == str(Path(path)) for path in paths)
+
+
 def test_loader():
-    root_project_dir = Path(__file__).resolve().parent
-    test_target_dir = root_project_dir / "testdata"
 
     loader = RepoDirectoryLoader(test_target_dir, "python")
     docs = loader.load()
     sourcePaths = [d.metadata["source"] for d in docs]
     sourcePaths = sorted(set(sourcePaths))
 
-    assert _in_path("ci.yaml", sourcePaths)
     assert _in_path("Dockerfile", sourcePaths)
-    assert not _in_path(".pytest_cache", sourcePaths)
-    assert not _in_path(".mypy_cache", sourcePaths)
-    assert not _in_path(".ruff_cache", sourcePaths)
-    assert not _in_path(".venv", sourcePaths)
+    assert _in_path(".drone.yml", sourcePaths)
+    assert _in_path(".gitlab-ci.yml", sourcePaths)
+    assert _in_path(".circleci/config.yml", sourcePaths)
+    assert _in_path(".jenkins/config.yml", sourcePaths)
+    assert _in_path(".github/workflows/ci.yaml", sourcePaths)
+    assert not _anywhere_in_path(".pytest_cache", sourcePaths)
+    assert not _anywhere_in_path(".mypy_cache", sourcePaths)
+    assert not _anywhere_in_path(".ruff_cache", sourcePaths)
+    assert not _anywhere_in_path(".venv", sourcePaths)
diff --git a/tests/testdata/.circleci/config.yml b/tests/testdata/.circleci/config.yml
@@ -0,0 +1,23 @@
+version: 2.1
+
+jobs:
+  build:
+    docker:
+      - image: circleci/node:16
+    steps:
+      - checkout
+      - run:
+          name: Install dependencies
+          command: npm install
+      - run:
+          name: Run tests
+          command: npm test
+      - run:
+          name: Build application
+          command: npm run build
+
+workflows:
+  version: 2
+  build_and_test:
+    jobs:
+      - build
diff --git a/tests/testdata/.drone.yml b/tests/testdata/.drone.yml
@@ -0,0 +1,22 @@
+kind: pipeline
+type: docker
+name: default
+
+steps:
+  - name: install
+    image: node:16
+    commands:
+      - npm install
+
+  - name: test
+    image: node:16
+    commands:
+      - npm test
+
+  - name: deploy
+    image: node:16
+    when:
+      branch:
+        - main
+    commands:
+      - npm run deploy
diff --git a/tests/testdata/.gitlab-ci.yml b/tests/testdata/.gitlab-ci.yml
@@ -0,0 +1,28 @@
+stages:
+  - build
+  - test
+  - deploy
+
+build:
+  stage: build
+  script:
+    - echo "Building project..."
+    - npm install
+    - npm run build
+  artifacts:
+    paths:
+      - dist/
+
+test:
+  stage: test
+  script:
+    - echo "Running tests..."
+    - npm test
+
+deploy:
+  stage: deploy
+  script:
+    - echo "Deploying application..."
+    - npm run deploy
+  only:
+    - main
diff --git a/tests/testdata/.jenkins/config.yml b/tests/testdata/.jenkins/config.yml
@@ -0,0 +1,29 @@
+pipeline {
+    agent any
+
+    stages {
+        stage('Checkout') {
+            steps {
+                checkout scm
+            }
+        }
+        stage('Build') {
+            steps {
+                sh 'npm install'
+            }
+        }
+        stage('Test') {
+            steps {
+                sh 'npm test'
+            }
+        }
+        stage('Deploy') {
+            when {
+                branch 'main'
+            }
+            steps {
+                sh 'npm run deploy'
+            }
+        }
+    }
+}