Merge pull request #3 from markbattistella:2024-08-16---Cleanup-and-G…

…H-actions 2024-08-16 - WIP GH actions and validation
markbattistella · Aug 16, 2024 · 3cde0ae · 3cde0ae
2 parents 0a5bd3a + fab1af3
commit 3cde0ae
Show file tree

Hide file tree

Showing 11 changed files with 393 additions and 4 deletions.
diff --git a/.github/ISSUE_TEMPLATE/new-word.yml b/.github/ISSUE_TEMPLATE/new-word.yml
@@ -0,0 +1,49 @@
+name: New Word Submission
+
+description: Submit new words to be added to the PhraseKit library.
+
+title: '✨ New Word Submission'
+
+labels: ["new-word", "triage"]
+
+body:
+
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for contributing new words to the PhraseKit library!
+
+        Please enter the words you want to add, one per line, and select the appropriate parts of speech (POS) for them. You can assign multiple POS if needed.
+
+  - type: textarea
+    attributes:
+      label: New Words
+      description: Enter the words you wish to add, one per line.
+      placeholder: |
+        word1
+        word2
+        word3
+    validations:
+      required: true
+
+  - type: checkboxes
+    id: pos
+    attributes:
+      label: Parts of Speech (POS)
+      description: Select the appropriate parts of speech for the words you're submitting. You can select multiple options if applicable.
+      options:
+        - label: Noun
+        - label: Verb
+        - label: Adjective
+        - label: Adverb
+    validations:
+      required: true
+
+  - type: checkboxes
+    id: terms
+    attributes:
+      label: Code of Conduct
+      description: By submitting these words, you agree to follow our Code of Conduct.
+      options:
+        - label: I agree to follow this project's Code of Conduct
+          required: true
diff --git a/.github/workflows/add-word.py b/.github/workflows/add-word.py
@@ -0,0 +1,87 @@
+import json
+import os
+import sys
+import jsonschema
+from jsonschema import validate
+from pathlib import Path
+import re
+from github import Github
+
+# Define the schema for validation
+schema = {
+    "type": "object",
+    "properties": {
+        "pending": {"type": "array", "items": {"type": "string"}},
+        "safe": {"type": "array", "items": {"type": "string"}},
+        "unsafe": {"type": "array", "items": {"type": "string"}},
+    },
+    "required": ["pending", "safe", "unsafe"]
+}
+
+# Load environment variables
+words = os.environ.get("WORDS", "")
+pos_list = os.environ.get("POS", "").splitlines()
+issue_number = os.environ.get("GITHUB_ISSUE_NUMBER")
+repo_name = os.environ.get("GITHUB_REPOSITORY")
+token = os.environ.get("GITHUB_TOKEN")
+
+# Convert the words and POS list into usable data
+word_list = [word.strip().lower() for word in words.splitlines() if word.strip()]
+pos_list = [pos.strip().lower() for pos in pos_list if pos.strip()]
+
+if not word_list or not pos_list:
+    print("No valid words or POS provided.")
+    sys.exit(1)
+
+# Validate words (must be alpha only)
+invalid_words = [word for word in word_list if not re.match(r'^[a-z]+$', word)]
+if invalid_words:
+    invalid_word_list = ', '.join(invalid_words)
+    message = f"The following words are invalid and cannot be processed: {invalid_word_list}. Only alphabetic words are allowed."
+
+    # Post comment to GitHub issue
+    g = Github(token)
+    repo = g.get_repo(repo_name)
+    issue = repo.get_issue(int(issue_number))
+    issue.create_comment(message)
+
+    print(message)
+    sys.exit(1)
+
+# Paths to the JSON files
+base_path = Path("./Sources/PhraseKit/Resources")
+file_map = {
+    "adjective": base_path / "_adjective.json",
+    "adverb": base_path / "_adverb.json",
+    "noun": base_path / "_noun.json",
+    "verb": base_path / "_verb.json"
+}
+
+# Ensure the base directory exists
+base_path.mkdir(parents=True, exist_ok=True)
+
+# Function to load or create a JSON file
+def load_or_create_json(path):
+    if path.exists():
+        with open(path, "r") as f:
+            data = json.load(f)
+    else:
+        data = {"pending": [], "safe": [], "unsafe": []}
+    return data
+
+# Function to save JSON data
+def save_json(path, data):
+    with open(path, "w") as f:
+        json.dump(data, f, indent=4, ensure_ascii=False)
+
+# Update the appropriate JSON files
+for word in word_list:
+    for pos in pos_list:
+        if pos in file_map:
+            json_path = file_map[pos]
+            json_data = load_or_create_json(json_path)
+            if word not in json_data["pending"] and word not in json_data["safe"] and word not in json_data["unsafe"]:
+                json_data["pending"].append(word)
+            save_json(json_path, json_data)
+
+print("Words successfully added to the pending list in the appropriate JSON files.")
diff --git a/.github/workflows/add-word.yml b/.github/workflows/add-word.yml
@@ -0,0 +1,52 @@
+name: Process New Word Submission
+
+on:
+  issues:
+    types: [opened]
+
+jobs:
+  process-new-word:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Extract issue details
+      id: extract
+      run: |
+        echo "WORDS=$(echo '${{ github.event.issue.body }}' | sed -n '/^New Words/,/^$/p' | tail -n +2)" >> $GITHUB_ENV
+        echo "POS=$(echo '${{ github.event.issue.body }}' | sed -n '/^Parts of Speech/,/^$/p' | tail -n +2 | sed 's/- //g')" >> $GITHUB_ENV
+        echo "GITHUB_ISSUE_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
+
+    - name: Setup Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install jsonschema PyGithub
+
+    - name: Process and update JSON
+      run: |
+        python3 scripts/update_pending_json.py "$WORDS" "$POS"
+
+    - name: Commit changes
+      if: success()
+      run: |
+        BRANCH_NAME=$(date +"%Y-%m")
+        git checkout -b $BRANCH_NAME || git checkout $BRANCH_NAME
+        git add Sources/PhraseKit/Resources/*.json
+        git commit -m "Add new words to pending lists in JSON files from issue #${{ github.event.issue.number }}"
+        git push origin $BRANCH_NAME
+
+    - name: Create Pull Request
+      if: success()
+      run: |
+        gh pr create --title "Monthly Merge: $BRANCH_NAME" \
+                     --body "This PR merges the changes for $BRANCH_NAME." \
+                     --base main \
+                     --head $BRANCH_NAME \
+                     --label "monthly-merge"
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,5 @@ xcuserdata/
 Packages/
 Package.pins
 Package.resolved
+.env
+*.secrets
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,11 @@
+{
+  "spellright.language": [
+    "en_AU"
+  ],
+  "spellright.documentTypes": [
+    "markdown",
+    "latex",
+    "plaintext",
+    "jsonc"
+  ]
+}
diff --git a/Filter/prohibited.json b/Filter/prohibited.json
diff --git a/Filter/scanner.js b/Filter/scanner.js
@@ -0,0 +1,186 @@
+/**
+ * @file scanner.js
+ * @description This script processes JSON files containing word lists and categorizes words into different categories based on specific rules. The categories include "pending," "safe," "unsafe," and "incompatible." The script also provides detailed logs for each file processed, including counts of words moved or skipped.
+ * @version 1.0.0
+ * @license MIT
+ */
+
+/**
+ * @constant {Set<string>} blacklist
+ * @description A set of words considered "unsafe," loaded from the prohibited.json file. Words found in this set are categorized as "unsafe."
+ */
+const blacklist = new Set(require('./prohibited.json')); // Assuming you run from the Filter directory
+
+/**
+ * @function processJsonFile
+ * @description Processes a single JSON file by categorizing words into "safe," "unsafe," or "incompatible" based on specific criteria. Logs detailed information about the processing.
+ * @param {string} filePath - The path to the JSON file to be processed.
+ * @param {Array<string>} rescanOptions - The categories to rescan or process. Can include "pending," "safe," "unsafe," and "all."
+ * @param {number} index - The index of the current file being processed, used for logging.
+ * @param {number} totalFiles - The total number of files to be processed, used for logging.
+ */
+const processJsonFile = (filePath, rescanOptions, index, totalFiles) => {
+  const fileName = path.basename(filePath);
+  console.log(`[${index + 1} / ${totalFiles}] WORKING ON FILE`);
+  console.log(`  - File: "${fileName}"`);
+
+  const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));
+
+  // Extract categories from the JSON data
+  const { pending = [], safe = [], unsafe = [], incompatible = [] } = data;
+
+  console.log(`  - Initial counts:`);
+  console.log(`      | Pending      | ${String(pending.length).padStart(6)}`);
+  console.log(`      | Safe         | ${String(safe.length).padStart(6)}`);
+  console.log(`      | Unsafe       | ${String(unsafe.length).padStart(6)}`);
+  console.log(`      | Incompatible | ${String(incompatible.length).padStart(6)}`);
+
+  // Arrays to store the new categorization
+  let newSafe = [...safe];  // Keep existing safe words
+  let newUnsafe = [...unsafe];  // Keep existing unsafe words
+  let newIncompatible = [...incompatible];  // Keep existing incompatible words
+
+  // Counters for logging
+  let movedToSafe = 0;
+  let movedToUnsafe = 0;
+  let movedToIncompatible = 0;
+  let skippedSafe = 0;
+  let skippedUnsafe = 0;
+
+  /**
+   * @function processWords
+   * @description Processes a list of words and categorizes them into "safe," "unsafe," or "incompatible" based on whether they contain spaces or are found in the blacklist.
+   * @param {Array<string>} words - The list of words to be processed.
+   * @param {string} category - The category of words being processed (e.g., "pending," "safe," "unsafe").
+   */
+  const processWords = (words, category) => {
+    console.log(`  - Processing "${category}" category with ${words.length} words`);
+    words.forEach(word => {
+      if (word.includes(' ')) {
+        newIncompatible.push(word);
+        if (category === 'pending') movedToIncompatible++;
+      } else if (blacklist.has(word)) {
+        newUnsafe.push(word);
+        if (category === 'pending') movedToUnsafe++;
+        else skippedUnsafe++;
+      } else {
+        newSafe.push(word);
+        if (category === 'pending') movedToSafe++;
+        else skippedSafe++;
+      }
+    });
+  };
+
+  // Determine which categories to process
+  if (rescanOptions.includes('all')) {
+    newSafe.length = 0; // Clear safe before reprocessing
+    newUnsafe.length = 0; // Clear unsafe before reprocessing
+    newIncompatible.length = 0; // Clear incompatible before reprocessing
+    processWords(pending, 'pending');
+    processWords(safe, 'safe');
+    processWords(unsafe, 'unsafe');
+  } else {
+    if (rescanOptions.length === 0 || rescanOptions.includes('pending')) {
+      processWords(pending, 'pending');
+    }
+    if (rescanOptions.includes('safe')) {
+      newSafe.length = 0; // Clear safe before reprocessing
+      processWords(safe, 'safe');
+    }
+    if (rescanOptions.includes('unsafe')) {
+      newUnsafe.length = 0; // Clear unsafe before reprocessing
+      processWords(unsafe, 'unsafe');
+    }
+  }
+
+  // Remove duplicates and sort the lists
+  newSafe = Array.from(new Set(newSafe)).sort();
+  newUnsafe = Array.from(new Set(newUnsafe)).sort();
+  newIncompatible = Array.from(new Set(newIncompatible)).sort();
+
+  const output = {
+    pending: [], // After processing, pending is empty
+    safe: newSafe,
+    unsafe: newUnsafe,
+    incompatible: newIncompatible
+  };
+
+  const tempFilePath = filePath + '.tmp';
+
+  // Write to a temporary file with minified JSON
+  fs.writeFileSync(tempFilePath, JSON.stringify(output, null, 0));
+
+  // Rename the temporary file to overwrite the original file
+  fs.renameSync(tempFilePath, filePath);
+
+  console.log(`  - Moved:`);
+  console.log(`      | ${String(movedToSafe).padStart(5)} words: Pending --> Safe`);
+  console.log(`      | ${String(movedToUnsafe).padStart(5)} words: Pending --> Unsafe`);
+  console.log(`      | ${String(movedToIncompatible).padStart(5)} words: Pending --> Incompatible`);
+  console.log(`  - Skipped:`);
+  console.log(`      | ${String(skippedSafe).padStart(5)} words already in Safe`);
+  console.log(`      | ${String(skippedUnsafe).padStart(5)} words already in Unsafe`);
+  console.log(`  - Processed and updated\n`);
+};
+
+/**
+ * @function getAllJsonFiles
+ * @description Recursively retrieves all JSON files from a specified directory.
+ * @param {string} baseDir - The base directory to search for JSON files.
+ * @returns {Array<string>} - An array of file paths to the JSON files found.
+ */
+const getAllJsonFiles = (baseDir) => {
+  let results = [];
+
+  function traverseDir(currentDir) {
+    const list = fs.readdirSync(currentDir);
+
+    list.forEach(file => {
+      const filePath = path.join(currentDir, file);
+      const stat = fs.statSync(filePath);
+
+      if (stat && stat.isDirectory()) {
+        traverseDir(filePath); // Recurse into directories
+      } else if (file.endsWith('.json')) {
+        results.push(filePath); // Only add .json files
+      }
+    });
+  }
+
+  traverseDir(baseDir);
+  return results;
+};
+
+/**
+ * @function main
+ * @description The main function that processes all JSON files in the specified directory based on the provided rescan options.
+ * @param {string} baseDir - The base directory containing the JSON files to process.
+ * @param {Array<string>} rescanOption - The categories to rescan or process. Can include "pending," "safe," "unsafe," and "all."
+ */
+const main = (baseDir, rescanOption) => {
+  const files = getAllJsonFiles(baseDir);
+
+  if (files.length === 0) {
+    console.error('No files found matching the pattern.');
+    return;
+  }
+
+  console.log(`\n[i] FOUND ${files.length} JSON FILES\n`);
+
+  files.forEach((filePath, index) => processJsonFile(filePath, rescanOption, index, files.length));
+
+  console.log('[i] PROCESS COMPLETE');
+};
+
+// Get the base directory and rescan option from the command line arguments and run the script
+const [baseDir, rescanOptionArg] = process.argv.slice(2);
+
+if (!baseDir) {
+  console.error('Usage: node scanner.js /path/to/files <rescan_option>');
+  console.error('Rescan options: pending (default), safe, unsafe, all');
+  process.exit(1);
+}
+
+const rescanOption = rescanOptionArg ? rescanOptionArg.split(',') : [];
+
+main(path.resolve(baseDir), rescanOption);
diff --git a/Sources/PhraseKit/Resources/_adjective.json b/Sources/PhraseKit/Resources/_adjective.json
diff --git a/Sources/PhraseKit/Resources/_adverb.json b/Sources/PhraseKit/Resources/_adverb.json
diff --git a/Sources/PhraseKit/Resources/_noun.json b/Sources/PhraseKit/Resources/_noun.json
diff --git a/Sources/PhraseKit/Resources/_verb.json b/Sources/PhraseKit/Resources/_verb.json