Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate language modes to CSV #1528

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
04cf66f
Start on three column CSV handling code
FireChickenProductivity Jul 30, 2024
7bad13a
Draft parsing the file
FireChickenProductivity Jul 30, 2024
2817402
Add parsing intermediate value parsing
FireChickenProductivity Jul 30, 2024
ea9e81f
Draft unification function
FireChickenProductivity Jul 31, 2024
be4aa56
Improve naming
FireChickenProductivity Jul 31, 2024
7fa6e0b
Finish drafting file handling
FireChickenProductivity Jul 31, 2024
d005db0
Fix syntax
FireChickenProductivity Jul 31, 2024
e4fd3f0
Make some minor fixes
FireChickenProductivity Jul 31, 2024
f430655
Stop using deprecated function
FireChickenProductivity Jul 31, 2024
70c2eab
Handle different tuple sizes
FireChickenProductivity Jul 31, 2024
3f1fd5e
Remove accidental character
FireChickenProductivity Jul 31, 2024
b7e8b9e
Make name less confusing
FireChickenProductivity Jul 31, 2024
9ebff4c
Watch file
FireChickenProductivity Jul 31, 2024
f37298d
Draft refactoring of language modes
FireChickenProductivity Aug 1, 2024
76a9df6
Correctly use name, fix single item tuples
FireChickenProductivity Aug 2, 2024
a8d0a96
Make sure file exists before loading as a resource
FireChickenProductivity Aug 2, 2024
e2d4c91
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 3, 2024
c98009a
Add missing defaults
FireChickenProductivity Aug 5, 2024
88f816c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 5, 2024
d81ebd5
Properly handle new lines
FireChickenProductivity Aug 5, 2024
79a9cac
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 5, 2024
10bd11b
Strip text for values
FireChickenProductivity Aug 5, 2024
000fb40
Theoretically address issues brought up in community meeting
FireChickenProductivity Aug 10, 2024
e46d3eb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 10, 2024
fc201b8
Document more of the change
FireChickenProductivity Aug 10, 2024
4275392
Declare globals first
FireChickenProductivity Aug 10, 2024
94c16a7
Update edit_text_file.py
nriley Aug 10, 2024
91d9a3e
Merge branch 'main' into migrate-language-modes
nriley Aug 10, 2024
cb26c99
Add callback
FireChickenProductivity Aug 11, 2024
3d55c2c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 163 additions & 75 deletions core/modes/language_modes.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,11 @@
from talon import Context, Module, actions

# Maps language mode names to the extensions that activate them. Only put things
# here which have a supported language mode; that's why there are so many
# commented out entries. TODO: make this a csv file?
language_extensions = {
# 'assembly': 'asm s',
# 'bash': 'bashbook sh',
"batch": "bat",
"c": "c h",
# 'cmake': 'cmake',
# "cplusplus": "cpp hpp",
"csharp": "cs",
"css": "css",
# 'elisp': 'el',
# 'elm': 'elm',
"gdb": "gdb",
"go": "go",
"java": "java",
"javascript": "js",
"javascriptreact": "jsx",
# "json": "json",
"kotlin": "kt",
"lua": "lua",
"markdown": "md",
# 'perl': 'pl',
"php": "php",
# 'powershell': 'ps1',
"python": "py",
"protobuf": "proto",
"r": "r",
# 'racket': 'rkt',
"ruby": "rb",
"rust": "rs",
"scala": "scala",
"scss": "scss",
# 'snippets': 'snippets',
"sql": "sql",
"stata": "do ado",
"talon": "talon",
"talonlist": "talon-list",
"terraform": "tf",
"tex": "tex",
"typescript": "ts",
"typescriptreact": "tsx",
# 'vba': 'vba',
"vimscript": "vim vimrc",
# html doesn't actually have a language mode, but we do have snippets.
"html": "html",
}

# Override speakable forms for language modes. If not present, a language mode's
# name is used directly.
language_name_overrides = {
"cplusplus": ["see plus plus"],
"csharp": ["see sharp"],
"css": ["c s s"],
"gdb": ["g d b"],
"go": ["go", "go lang", "go language"],
"r": ["are language"],
"tex": ["tech", "lay tech", "latex"],
}
from talon import Context, Module, actions, resource

from ..user_settings import (
compute_csv_path,
compute_spoken_form_to_key_dictionary,
create_three_columns_csv_from_default_if_nonexistent,
get_key_value_pairs_and_spoken_forms_from_three_column_csv,
)
nriley marked this conversation as resolved.
Show resolved Hide resolved

mod = Module()

Expand All @@ -74,20 +20,162 @@
mod.tag("code_language_forced", "This tag is active when a language mode is forced")
mod.list("language_mode", desc="Name of a programming language mode.")

ctx.lists["self.language_mode"] = {
name: language
for language in language_extensions
for name in language_name_overrides.get(language, [language])
}

# Maps extension to languages.
extension_lang_map = {
"." + ext: language
for language, extensions in language_extensions.items()
for ext in extensions.split()
}

language_ids = set(language_extensions.keys())
extension_lang_map = None

language_ids = None
language_extensions = None

SETTINGS_FILENAME = "language_modes.csv"
settings_filepath = compute_csv_path(SETTINGS_FILENAME)

LANGUAGE_HEADERS = ["language", "extensions", "spoken_forms"]


def make_sure_settings_file_exists():
# Maps language mode names to the extensions that activate them and language spoken forms. Only put things
# here which have a supported language mode; that's why there are so many
# commented out entries. TODO: make this a csv file?
AndreasArvidsson marked this conversation as resolved.
Show resolved Hide resolved
default_csv_contents = [
# ['assembly', ('asm', 's'),],
# ['bash', ('bashbook', 'sh'),],
[
"batch",
("bat",),
],
[
"c",
("c", "h"),
],
# ['cmake', ('cmake',),],
# ["cplusplus", ("cpp hpp",), ("see plus plus",)],
["csharp", ("cs",), ("see sharp",)],
["css", ("css",), ("c s s",)],
# ['elisp', ('el'),],
# ['elm', ('elm'),],
["gdb", ("gdb",), ("g d b",)],
["go", ("go",), ("go", "go lang", "go language")],
["java", ("java",)],
["javascript", ("js",)],
["javascriptreact", ("jsx",)],
# ["json", ("json",),],
[
"kotlin",
("kt",),
],
[
"lua",
("lua",),
],
[
"markdown",
("md",),
],
# ['perl', ('pl',),],
[
"php",
("php",),
],
# ['powershell', ('ps1',),],
[
"python",
("py",),
],
[
"protobuf",
("proto",),
],
["r", ("r"), ("are language",)],
# ['racket', ('rkt,'),],
[
"ruby",
("rb",),
],
[
"rust",
("rs",),
],
[
"scala",
("scala",),
],
[
"scss",
("scss",),
],
# ['snippets', ('snippets',),],
[
"sql",
("sql",),
],
[
"stata",
("do", "ado"),
],
[
"talon",
("talon",),
],
[
"talonlist",
("talon-list",),
],
[
"terraform",
("tf",),
],
["tex", ("tex",), ("tech", "lay tech", "latex")],
[
"typescript",
("ts",),
],
[
"typescriptreact",
("tsx",),
],
# ['vba', ('vba',),],
[
"vimscript",
("vim", "vimrc"),
],
# htm doesn't actually have a language moded, but we do have snippets.
nriley marked this conversation as resolved.
Show resolved Hide resolved
[
"html",
("html",),
],
]
create_three_columns_csv_from_default_if_nonexistent(
SETTINGS_FILENAME, LANGUAGE_HEADERS, default_csv_contents
)


make_sure_settings_file_exists()


@resource.watch(settings_filepath)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this handle if the file doesn't exist?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Last time I tested the code, it created the file if it did not exist.

def load_language_modes(path: str):
make_sure_settings_file_exists()
global language_extensions
AndreasArvidsson marked this conversation as resolved.
Show resolved Hide resolved
language_extensions, language_spoken_forms = (
get_key_value_pairs_and_spoken_forms_from_three_column_csv(
SETTINGS_FILENAME,
LANGUAGE_HEADERS,
)
)
ctx.lists["self.language_mode"] = compute_spoken_form_to_key_dictionary(
language_extensions, language_spoken_forms
)
global extension_lang_map
extension_lang_map = {
"." + ext: language
for language, extensions in language_extensions.items()
for ext in extensions
}
global language_ids
language_ids = set(language_extensions.keys())


load_language_modes(settings_filepath)

forced_language = ""

Expand Down
130 changes: 130 additions & 0 deletions core/user_settings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import io
import os
from pathlib import Path

Expand Down Expand Up @@ -75,3 +76,132 @@ def append_to_csv(filename: str, rows: dict[str, str]):
writer.writerow([])
for key, value in rows.items():
writer.writerow([key] if key == value else [value, key])


def get_key_value_pairs_and_spoken_forms_from_three_column_csv(
filename: str, headers: tuple[str, str, str]
):
"""Retrieves a list from a CSV of the form name,values,spoken_forms"""
path = compute_csv_path(filename)

rows = _obtain_rows_from_csv(path)

result = _convert_rows_from_file_with_headers_to_key_value_pairs_and_spoken_forms(
rows, filename, headers
)
return result


def create_three_columns_csv_from_default_if_nonexistent(
filename: str,
headers: tuple[str, str, str],
default: list[list[str, tuple[str], tuple[str]]],
):
path = compute_csv_path(filename)
if not path.is_file():
_create_three_columns_csv_from_default(path, headers, default)


def _create_three_columns_csv_from_default(path, headers, default):
with open(path, "w", encoding="utf-8", newline="") as file:
writer = csv.writer(file)
writer.writerow(headers)
for row_tuple in default:
row = _compute_row_for_three_column_csv(row_tuple)
writer.writerow(row)


def _compute_row_for_three_column_csv(input_tuple):
if len(input_tuple) == 3:
name, values, spoken_forms = input_tuple
else:
name, values = input_tuple
spoken_forms = None
values_text = _compute_values_packed_into_column(values)
row = [name, values_text]
if spoken_forms:
spoken_forms_text = _compute_values_packed_into_column(spoken_forms)
row.append(spoken_forms_text)
return row


def _compute_values_packed_into_column(values):
output = io.StringIO()
writer = csv.writer(output, delimiter=";")
writer.writerow(values)
result = output.getvalue().strip()
return result


def _obtain_rows_from_csv(path):
with open(str(path), "r", newline="") as f:
rows = list(csv.reader(f))
return rows


def _convert_rows_from_file_with_headers_to_key_value_pairs_and_spoken_forms(
rows, filename, headers
):
key_value_pairs = {}
spoken_forms = {}
if len(rows) >= 2:
_complain_if_invalid_headers_found_in_file(rows, headers, filename)
for row in rows[1:]:
if len(row) == 0:
# Windows newlines are sometimes read as empty rows. :champagne:
continue
elif len(row) == 1:
print(f"{filename}: Ignoring row with only one value: {row}.")
continue
elif len(row) == 2:
name, values_text = row
new_spoken_forms_text = ""
else:
if len(row) > 3:
print(
f'"{filename}": More than three values in row: {row}.'
+ " Ignoring the extras."
)
name, values_text, new_spoken_forms_text = row[:3]
name = name.strip()
nriley marked this conversation as resolved.
Show resolved Hide resolved
values = _get_intermediate_values_from_column(values_text)
key_value_pairs[name] = values
if new_spoken_forms_text:
spoken_forms[name] = _get_intermediate_values_from_column(
new_spoken_forms_text
)
return key_value_pairs, spoken_forms


def _get_intermediate_values_from_column(values_text):
reader = csv.reader([values_text], delimiter=";")
values = next(reader)
values = [value.strip() for value in values]
return values


def _complain_if_invalid_headers_found_in_file(rows, expected_headers, filename):
actual_headers = rows[0]
if not actual_headers == list(expected_headers):
print(
f'"{filename}": Malformed headers - {actual_headers}.'
+ f" Should be {list(expected_headers)}. Ignoring row."
)


def compute_csv_path(filename: str):
path = SETTINGS_DIR / filename
assert filename.endswith(".csv")
return path


def compute_spoken_form_to_key_dictionary(key_value_pairs, spoken_forms):
if spoken_forms:
result = {
name: key
for key in key_value_pairs
for name in spoken_forms.get(key, [key])
}
else:
result = {key: key for key in key_value_pairs}
return result