From c7708613a37f81a3d6327758c0189c48cdb784a6 Mon Sep 17 00:00:00 2001
From: xvnpw <17719543+xvnpw@users.noreply.github.com>
Date: Tue, 10 Dec 2024 18:34:52 +0100
Subject: [PATCH] added filter for hidden cicd files

---
 README.md                           | 30 ++++++++++++++++++++++++++---
 ai_security_analyzer/app.py         |  2 +-
 ai_security_analyzer/graphs.py      |  2 +-
 ai_security_analyzer/loaders.py     | 10 ++++++----
 tests/test_graphs.py                |  2 +-
 tests/test_loaders.py               | 25 ++++++++++++++++--------
 tests/testdata/.circleci/config.yml | 23 ++++++++++++++++++++++
 tests/testdata/.drone.yml           | 22 +++++++++++++++++++++
 tests/testdata/.gitlab-ci.yml       | 28 +++++++++++++++++++++++++++
 tests/testdata/.jenkins/config.yml  | 29 ++++++++++++++++++++++++++++
 10 files changed, 155 insertions(+), 18 deletions(-)
 create mode 100644 tests/testdata/.circleci/config.yml
 create mode 100644 tests/testdata/.drone.yml
 create mode 100644 tests/testdata/.gitlab-ci.yml
 create mode 100644 tests/testdata/.jenkins/config.yml

diff --git a/README.md b/README.md
index 0b441b7..fe63a69 100644
--- a/README.md
+++ b/README.md
@@ -21,12 +21,12 @@
 
 ## Overview
 
-**AI Security Analyzer** is a Python-based tool that analyzes your project's codebase and automatically generates detailed security documentation. It supports multiple analysis types including security design documentation and threat modeling. The tool supports multiple project types and utilizes advanced language models (LLMs) to create insightful security documentation tailored to your project's specific needs.
+**AI Security Analyzer** is a Python-based tool that analyzes your project's codebase and automatically generates detailed security documentation. It supports multiple analysis types including security design documentation, threat modeling, attack surface analysis, and threat scenarios. The tool supports multiple project types and utilizes advanced language models (LLMs) to create insightful security documentation tailored to your project's specific needs.
 
 ## Features
 
 - 🔍 **Intelligent Code Analysis**: Automatically analyzes your project's codebase for security considerations.
-- 📝 **Automated Documentation Generation**: Generates comprehensive security design and threat modeling documents.
+- 📝 **Automated Documentation Generation**: Generates comprehensive security design, threat modeling, attack surface analysis, and threat scenarios documents.
 - 🔐 **Security-Focused Insights**: Provides detailed insights into potential security risks and design patterns.
 - 🔄 **Multi-Project Support**: Supports Python, Go project types.
 - 🤖 **Multiple LLM Provider Support**: Compatible with OpenAI, OpenRouter, and Anthropic models.
@@ -181,6 +181,8 @@ The application accepts various command-line arguments to tailor its behavior.
 - `--agent-prompt-type`: Prompt to use in agent (default: `sec-design`). Options are:
   - `sec-design`: Generate a security design document for the project.
   - `threat-modeling`: Perform threat modeling for the project.
+  - `attack-surface`: Perform attack surface analysis for the project.
+  - `threat-scenarios`: Perform threat scenarios analysis for the project using Daniel Miessler's [prompt](https://github.com/danielmiessler/fabric/blob/f5f50cc4c94a539ee56bc533e9b1194eb9aa424d/patterns/create_threat_scenarios/system.md).
 - `--files-context-window`: Maximum token size for LLM context window. Automatically determined if not set.
 - `--files-chunk-size`: Chunk size in tokens for splitting files. Automatically determined if not set.
 
@@ -261,13 +263,35 @@ poetry run python ai_security_analyzer/app.py \
 ```
 =========== dry-run ===========
 All documents token count: 123456
-List of chunked files to analyse:
+List of chunked files to analyze:
 src/main.py
 src/utils.py
 README.md
 ...
 ```
 
+## Real World Examples
+
+Check [examples](https://github.com/xvnpw/ai-security-analyzer/blob/main/examples/README.md) for real world examples, e.g. flask framework, requests library, etc.
+
+## Supported Project Types
+
+- Python
+- Go
+- More to come...
+
+In case you want to use a project type that is not supported, please use the `generic` project type with `--include`, `--include-mode`, `--exclude`, `--exclude-mode` options.
+
+**Example:**
+
+```bash
+poetry run python ai_security_analyzer/app.py \
+    -t /path/to/your/project \
+    -o security_design.md \
+    --project-type generic \
+    --include "**/*.java"
+```
+
 ## Troubleshooting
 
 ### Common Issues
diff --git a/ai_security_analyzer/app.py b/ai_security_analyzer/app.py
index a9f1c2c..3937e13 100644
--- a/ai_security_analyzer/app.py
+++ b/ai_security_analyzer/app.py
@@ -157,7 +157,7 @@ def parse_arguments() -> AppConfig:
         "--agent-prompt-type",
         choices=["sec-design", "threat-modeling", "attack-surface", "threat-scenarios"],
         default="sec-design",
-        help="Prompt to use in agent (default: sec-design): sec-design - Security Design for Project, threat-modeling - Threat Modeling for Project",
+        help="Prompt to use in agent (default: sec-design): sec-design - Security Design, threat-modeling - Threat Modeling, attack-surface - Attack Surface Analysis, threat-scenarios - Threat Scenarios",
     )
 
     # Editor configuration
diff --git a/ai_security_analyzer/graphs.py b/ai_security_analyzer/graphs.py
index 0a96539..b447ce9 100644
--- a/ai_security_analyzer/graphs.py
+++ b/ai_security_analyzer/graphs.py
@@ -84,7 +84,7 @@ def _write_output(self, state: dict[str, Any] | Any) -> None:
 
         output = f"""=========== dry-run ===========
 All documents token count: {token_count}
-List of chunked files to analyse:
+List of chunked files to analyze:
 {splitted_docs_str}
 """
         print(output)
diff --git a/ai_security_analyzer/loaders.py b/ai_security_analyzer/loaders.py
index dbdb652..e32abe6 100644
--- a/ai_security_analyzer/loaders.py
+++ b/ai_security_analyzer/loaders.py
@@ -19,7 +19,7 @@
 }
 
 GENERIC_FILES_GLOB = ["**/*.md", "**/Dockerfile", "**/*.yml", "**/*.sh", "**/*.bash", "**/*.yaml"]
-PYTHON_FILES_GLOB = ["**/*.py", "pyproject.toml"]
+PYTHON_FILES_GLOB = ["**/*.py", "pyproject.toml", "requirements.txt"]
 GO_FILES_GLOB = ["**/*.go", "**/go.mod", "Makefile"]
 
 FILES_GLOB: dict[str, List[str]] = {
@@ -35,8 +35,10 @@ def _is_visible(p: Path) -> bool:
     return not any(part.startswith(".") for part in p.parts)
 
 
-def _is_github(p: Path) -> bool:
-    return p.parts[0] == ".github"
+# function to check if the file is ci/cd related, e.g. github actions, gitlab ci, etc.
+def _is_ci_cd(p: Path) -> bool:
+    CICD = [".github", ".gitlab", ".circleci", ".jenkins", ".drone", ".gitlab-ci", ".drone.yml"]
+    return any(p.parts[0].startswith(cicd) for cicd in CICD)
 
 
 class RepoDirectoryLoader(DirectoryLoader):
@@ -138,7 +140,7 @@ def _lazy_load_file(self, item: Path, path: Path, pbar: Optional[Any]) -> Iterat
         """
         if item.is_file():
             relative_path = item.relative_to(path)
-            if _is_visible(relative_path) or self.load_hidden or _is_github(relative_path):
+            if _is_visible(relative_path) or self.load_hidden or _is_ci_cd(relative_path):
                 try:
                     logger.debug(f"Processing file: {str(item)}")
                     loader_cls = LOADERS.get(item.suffix, TextLoader)
diff --git a/tests/test_graphs.py b/tests/test_graphs.py
index b888edf..f99ad2a 100644
--- a/tests/test_graphs.py
+++ b/tests/test_graphs.py
@@ -70,7 +70,7 @@ def test_dry_run_graph_executor_success(capfd):
     # Build expected output
     expected_output = """=========== dry-run ===========
 All documents token count: 1234
-List of chunked files to analyse:
+List of chunked files to analyze:
 file1.py
 file2.py
 """
diff --git a/tests/test_loaders.py b/tests/test_loaders.py
index bb7cba3..d5c14f0 100644
--- a/tests/test_loaders.py
+++ b/tests/test_loaders.py
@@ -2,23 +2,32 @@
 
 from ai_security_analyzer.loaders import RepoDirectoryLoader
 
+root_project_dir = Path(__file__).resolve().parent
+test_target_dir = root_project_dir / "testdata"
 
-def _in_path(substring, paths):
+
+def _anywhere_in_path(substring, paths):
     return any(substring in path for path in paths)
 
 
+def _in_path(substring, paths):
+    return any(str(Path(test_target_dir / substring)) == str(Path(path)) for path in paths)
+
+
 def test_loader():
-    root_project_dir = Path(__file__).resolve().parent
-    test_target_dir = root_project_dir / "testdata"
 
     loader = RepoDirectoryLoader(test_target_dir, "python")
     docs = loader.load()
     sourcePaths = [d.metadata["source"] for d in docs]
     sourcePaths = sorted(set(sourcePaths))
 
-    assert _in_path("ci.yaml", sourcePaths)
     assert _in_path("Dockerfile", sourcePaths)
-    assert not _in_path(".pytest_cache", sourcePaths)
-    assert not _in_path(".mypy_cache", sourcePaths)
-    assert not _in_path(".ruff_cache", sourcePaths)
-    assert not _in_path(".venv", sourcePaths)
+    assert _in_path(".drone.yml", sourcePaths)
+    assert _in_path(".gitlab-ci.yml", sourcePaths)
+    assert _in_path(".circleci/config.yml", sourcePaths)
+    assert _in_path(".jenkins/config.yml", sourcePaths)
+    assert _in_path(".github/workflows/ci.yaml", sourcePaths)
+    assert not _anywhere_in_path(".pytest_cache", sourcePaths)
+    assert not _anywhere_in_path(".mypy_cache", sourcePaths)
+    assert not _anywhere_in_path(".ruff_cache", sourcePaths)
+    assert not _anywhere_in_path(".venv", sourcePaths)
diff --git a/tests/testdata/.circleci/config.yml b/tests/testdata/.circleci/config.yml
new file mode 100644
index 0000000..5c76d02
--- /dev/null
+++ b/tests/testdata/.circleci/config.yml
@@ -0,0 +1,23 @@
+version: 2.1
+
+jobs:
+  build:
+    docker:
+      - image: circleci/node:16
+    steps:
+      - checkout
+      - run:
+          name: Install dependencies
+          command: npm install
+      - run:
+          name: Run tests
+          command: npm test
+      - run:
+          name: Build application
+          command: npm run build
+
+workflows:
+  version: 2
+  build_and_test:
+    jobs:
+      - build
diff --git a/tests/testdata/.drone.yml b/tests/testdata/.drone.yml
new file mode 100644
index 0000000..7c10cc0
--- /dev/null
+++ b/tests/testdata/.drone.yml
@@ -0,0 +1,22 @@
+kind: pipeline
+type: docker
+name: default
+
+steps:
+  - name: install
+    image: node:16
+    commands:
+      - npm install
+
+  - name: test
+    image: node:16
+    commands:
+      - npm test
+
+  - name: deploy
+    image: node:16
+    when:
+      branch:
+        - main
+    commands:
+      - npm run deploy
diff --git a/tests/testdata/.gitlab-ci.yml b/tests/testdata/.gitlab-ci.yml
new file mode 100644
index 0000000..7de9907
--- /dev/null
+++ b/tests/testdata/.gitlab-ci.yml
@@ -0,0 +1,28 @@
+stages:
+  - build
+  - test
+  - deploy
+
+build:
+  stage: build
+  script:
+    - echo "Building project..."
+    - npm install
+    - npm run build
+  artifacts:
+    paths:
+      - dist/
+
+test:
+  stage: test
+  script:
+    - echo "Running tests..."
+    - npm test
+
+deploy:
+  stage: deploy
+  script:
+    - echo "Deploying application..."
+    - npm run deploy
+  only:
+    - main
diff --git a/tests/testdata/.jenkins/config.yml b/tests/testdata/.jenkins/config.yml
new file mode 100644
index 0000000..24e3246
--- /dev/null
+++ b/tests/testdata/.jenkins/config.yml
@@ -0,0 +1,29 @@
+pipeline {
+    agent any
+
+    stages {
+        stage('Checkout') {
+            steps {
+                checkout scm
+            }
+        }
+        stage('Build') {
+            steps {
+                sh 'npm install'
+            }
+        }
+        stage('Test') {
+            steps {
+                sh 'npm test'
+            }
+        }
+        stage('Deploy') {
+            when {
+                branch 'main'
+            }
+            steps {
+                sh 'npm run deploy'
+            }
+        }
+    }
+}