Skip to content

Commit

Permalink
WIP: 2024-08-16
Browse files Browse the repository at this point in the history
  • Loading branch information
markbattistella committed Aug 16, 2024
1 parent 0a5bd3a commit fab1af3
Show file tree
Hide file tree
Showing 11 changed files with 393 additions and 4 deletions.
49 changes: 49 additions & 0 deletions .github/ISSUE_TEMPLATE/new-word.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: New Word Submission

description: Submit new words to be added to the PhraseKit library.

title: '✨ New Word Submission'

labels: ["new-word", "triage"]

body:

- type: markdown
attributes:
value: |
Thanks for contributing new words to the PhraseKit library!
Please enter the words you want to add, one per line, and select the appropriate parts of speech (POS) for them. You can assign multiple POS if needed.
- type: textarea
attributes:
label: New Words
description: Enter the words you wish to add, one per line.
placeholder: |
word1
word2
word3
validations:
required: true

- type: checkboxes
id: pos
attributes:
label: Parts of Speech (POS)
description: Select the appropriate parts of speech for the words you're submitting. You can select multiple options if applicable.
options:
- label: Noun
- label: Verb
- label: Adjective
- label: Adverb
validations:
required: true

- type: checkboxes
id: terms
attributes:
label: Code of Conduct
description: By submitting these words, you agree to follow our Code of Conduct.
options:
- label: I agree to follow this project's Code of Conduct
required: true
87 changes: 87 additions & 0 deletions .github/workflows/add-word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import json
import os
import sys
import jsonschema
from jsonschema import validate
from pathlib import Path
import re
from github import Github

# Define the schema for validation
schema = {
"type": "object",
"properties": {
"pending": {"type": "array", "items": {"type": "string"}},
"safe": {"type": "array", "items": {"type": "string"}},
"unsafe": {"type": "array", "items": {"type": "string"}},
},
"required": ["pending", "safe", "unsafe"]
}

# Load environment variables
words = os.environ.get("WORDS", "")
pos_list = os.environ.get("POS", "").splitlines()
issue_number = os.environ.get("GITHUB_ISSUE_NUMBER")
repo_name = os.environ.get("GITHUB_REPOSITORY")
token = os.environ.get("GITHUB_TOKEN")

# Convert the words and POS list into usable data
word_list = [word.strip().lower() for word in words.splitlines() if word.strip()]
pos_list = [pos.strip().lower() for pos in pos_list if pos.strip()]

if not word_list or not pos_list:
print("No valid words or POS provided.")
sys.exit(1)

# Validate words (must be alpha only)
invalid_words = [word for word in word_list if not re.match(r'^[a-z]+$', word)]
if invalid_words:
invalid_word_list = ', '.join(invalid_words)
message = f"The following words are invalid and cannot be processed: {invalid_word_list}. Only alphabetic words are allowed."

# Post comment to GitHub issue
g = Github(token)
repo = g.get_repo(repo_name)
issue = repo.get_issue(int(issue_number))
issue.create_comment(message)

print(message)
sys.exit(1)

# Paths to the JSON files
base_path = Path("./Sources/PhraseKit/Resources")
file_map = {
"adjective": base_path / "_adjective.json",
"adverb": base_path / "_adverb.json",
"noun": base_path / "_noun.json",
"verb": base_path / "_verb.json"
}

# Ensure the base directory exists
base_path.mkdir(parents=True, exist_ok=True)

# Function to load or create a JSON file
def load_or_create_json(path):
if path.exists():
with open(path, "r") as f:
data = json.load(f)
else:
data = {"pending": [], "safe": [], "unsafe": []}
return data

# Function to save JSON data
def save_json(path, data):
with open(path, "w") as f:
json.dump(data, f, indent=4, ensure_ascii=False)

# Update the appropriate JSON files
for word in word_list:
for pos in pos_list:
if pos in file_map:
json_path = file_map[pos]
json_data = load_or_create_json(json_path)
if word not in json_data["pending"] and word not in json_data["safe"] and word not in json_data["unsafe"]:
json_data["pending"].append(word)
save_json(json_path, json_data)

print("Words successfully added to the pending list in the appropriate JSON files.")
52 changes: 52 additions & 0 deletions .github/workflows/add-word.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Process New Word Submission

on:
issues:
types: [opened]

jobs:
process-new-word:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Extract issue details
id: extract
run: |
echo "WORDS=$(echo '${{ github.event.issue.body }}' | sed -n '/^New Words/,/^$/p' | tail -n +2)" >> $GITHUB_ENV
echo "POS=$(echo '${{ github.event.issue.body }}' | sed -n '/^Parts of Speech/,/^$/p' | tail -n +2 | sed 's/- //g')" >> $GITHUB_ENV
echo "GITHUB_ISSUE_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install jsonschema PyGithub
- name: Process and update JSON
run: |
python3 scripts/update_pending_json.py "$WORDS" "$POS"
- name: Commit changes
if: success()
run: |
BRANCH_NAME=$(date +"%Y-%m")
git checkout -b $BRANCH_NAME || git checkout $BRANCH_NAME
git add Sources/PhraseKit/Resources/*.json
git commit -m "Add new words to pending lists in JSON files from issue #${{ github.event.issue.number }}"
git push origin $BRANCH_NAME
- name: Create Pull Request
if: success()
run: |
gh pr create --title "Monthly Merge: $BRANCH_NAME" \
--body "This PR merges the changes for $BRANCH_NAME." \
--base main \
--head $BRANCH_NAME \
--label "monthly-merge"
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ xcuserdata/
Packages/
Package.pins
Package.resolved
.env
*.secrets
11 changes: 11 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"spellright.language": [
"en_AU"
],
"spellright.documentTypes": [
"markdown",
"latex",
"plaintext",
"jsonc"
]
}
2 changes: 2 additions & 0 deletions Filter/prohibited.json

Large diffs are not rendered by default.

186 changes: 186 additions & 0 deletions Filter/scanner.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
/**
* @file scanner.js
* @description This script processes JSON files containing word lists and categorizes words into different categories based on specific rules. The categories include "pending," "safe," "unsafe," and "incompatible." The script also provides detailed logs for each file processed, including counts of words moved or skipped.
* @version 1.0.0
* @license MIT
*/

/**
* @constant {Set<string>} blacklist
* @description A set of words considered "unsafe," loaded from the prohibited.json file. Words found in this set are categorized as "unsafe."
*/
const blacklist = new Set(require('./prohibited.json')); // Assuming you run from the Filter directory

/**
* @function processJsonFile
* @description Processes a single JSON file by categorizing words into "safe," "unsafe," or "incompatible" based on specific criteria. Logs detailed information about the processing.
* @param {string} filePath - The path to the JSON file to be processed.
* @param {Array<string>} rescanOptions - The categories to rescan or process. Can include "pending," "safe," "unsafe," and "all."
* @param {number} index - The index of the current file being processed, used for logging.
* @param {number} totalFiles - The total number of files to be processed, used for logging.
*/
const processJsonFile = (filePath, rescanOptions, index, totalFiles) => {
const fileName = path.basename(filePath);
console.log(`[${index + 1} / ${totalFiles}] WORKING ON FILE`);
console.log(` - File: "${fileName}"`);

const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));

// Extract categories from the JSON data
const { pending = [], safe = [], unsafe = [], incompatible = [] } = data;

console.log(` - Initial counts:`);
console.log(` | Pending | ${String(pending.length).padStart(6)}`);
console.log(` | Safe | ${String(safe.length).padStart(6)}`);
console.log(` | Unsafe | ${String(unsafe.length).padStart(6)}`);
console.log(` | Incompatible | ${String(incompatible.length).padStart(6)}`);

// Arrays to store the new categorization
let newSafe = [...safe]; // Keep existing safe words
let newUnsafe = [...unsafe]; // Keep existing unsafe words
let newIncompatible = [...incompatible]; // Keep existing incompatible words

// Counters for logging
let movedToSafe = 0;
let movedToUnsafe = 0;
let movedToIncompatible = 0;
let skippedSafe = 0;
let skippedUnsafe = 0;

/**
* @function processWords
* @description Processes a list of words and categorizes them into "safe," "unsafe," or "incompatible" based on whether they contain spaces or are found in the blacklist.
* @param {Array<string>} words - The list of words to be processed.
* @param {string} category - The category of words being processed (e.g., "pending," "safe," "unsafe").
*/
const processWords = (words, category) => {
console.log(` - Processing "${category}" category with ${words.length} words`);
words.forEach(word => {
if (word.includes(' ')) {
newIncompatible.push(word);
if (category === 'pending') movedToIncompatible++;
} else if (blacklist.has(word)) {
newUnsafe.push(word);
if (category === 'pending') movedToUnsafe++;
else skippedUnsafe++;
} else {
newSafe.push(word);
if (category === 'pending') movedToSafe++;
else skippedSafe++;
}
});
};

// Determine which categories to process
if (rescanOptions.includes('all')) {
newSafe.length = 0; // Clear safe before reprocessing
newUnsafe.length = 0; // Clear unsafe before reprocessing
newIncompatible.length = 0; // Clear incompatible before reprocessing
processWords(pending, 'pending');
processWords(safe, 'safe');
processWords(unsafe, 'unsafe');
} else {
if (rescanOptions.length === 0 || rescanOptions.includes('pending')) {
processWords(pending, 'pending');
}
if (rescanOptions.includes('safe')) {
newSafe.length = 0; // Clear safe before reprocessing
processWords(safe, 'safe');
}
if (rescanOptions.includes('unsafe')) {
newUnsafe.length = 0; // Clear unsafe before reprocessing
processWords(unsafe, 'unsafe');
}
}

// Remove duplicates and sort the lists
newSafe = Array.from(new Set(newSafe)).sort();
newUnsafe = Array.from(new Set(newUnsafe)).sort();
newIncompatible = Array.from(new Set(newIncompatible)).sort();

const output = {
pending: [], // After processing, pending is empty
safe: newSafe,
unsafe: newUnsafe,
incompatible: newIncompatible
};

const tempFilePath = filePath + '.tmp';

// Write to a temporary file with minified JSON
fs.writeFileSync(tempFilePath, JSON.stringify(output, null, 0));

// Rename the temporary file to overwrite the original file
fs.renameSync(tempFilePath, filePath);

console.log(` - Moved:`);
console.log(` | ${String(movedToSafe).padStart(5)} words: Pending --> Safe`);
console.log(` | ${String(movedToUnsafe).padStart(5)} words: Pending --> Unsafe`);
console.log(` | ${String(movedToIncompatible).padStart(5)} words: Pending --> Incompatible`);
console.log(` - Skipped:`);
console.log(` | ${String(skippedSafe).padStart(5)} words already in Safe`);
console.log(` | ${String(skippedUnsafe).padStart(5)} words already in Unsafe`);
console.log(` - Processed and updated\n`);
};

/**
* @function getAllJsonFiles
* @description Recursively retrieves all JSON files from a specified directory.
* @param {string} baseDir - The base directory to search for JSON files.
* @returns {Array<string>} - An array of file paths to the JSON files found.
*/
const getAllJsonFiles = (baseDir) => {
let results = [];

function traverseDir(currentDir) {
const list = fs.readdirSync(currentDir);

list.forEach(file => {
const filePath = path.join(currentDir, file);
const stat = fs.statSync(filePath);

if (stat && stat.isDirectory()) {
traverseDir(filePath); // Recurse into directories
} else if (file.endsWith('.json')) {
results.push(filePath); // Only add .json files
}
});
}

traverseDir(baseDir);
return results;
};

/**
* @function main
* @description The main function that processes all JSON files in the specified directory based on the provided rescan options.
* @param {string} baseDir - The base directory containing the JSON files to process.
* @param {Array<string>} rescanOption - The categories to rescan or process. Can include "pending," "safe," "unsafe," and "all."
*/
const main = (baseDir, rescanOption) => {
const files = getAllJsonFiles(baseDir);

if (files.length === 0) {
console.error('No files found matching the pattern.');
return;
}

console.log(`\n[i] FOUND ${files.length} JSON FILES\n`);

files.forEach((filePath, index) => processJsonFile(filePath, rescanOption, index, files.length));

console.log('[i] PROCESS COMPLETE');
};

// Get the base directory and rescan option from the command line arguments and run the script
const [baseDir, rescanOptionArg] = process.argv.slice(2);

if (!baseDir) {
console.error('Usage: node scanner.js /path/to/files <rescan_option>');
console.error('Rescan options: pending (default), safe, unsafe, all');
process.exit(1);
}

const rescanOption = rescanOptionArg ? rescanOptionArg.split(',') : [];

main(path.resolve(baseDir), rescanOption);
2 changes: 1 addition & 1 deletion Sources/PhraseKit/Resources/_adjective.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Sources/PhraseKit/Resources/_adverb.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Sources/PhraseKit/Resources/_noun.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Sources/PhraseKit/Resources/_verb.json

Large diffs are not rendered by default.

0 comments on commit fab1af3

Please sign in to comment.