Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ci: Update ruff and cruft template #291

Merged
merged 4 commits into from
Dec 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .cruft.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template": "https://github.com/KennethEnevoldsen/swift-python-cookiecutter",
"commit": "e02068889310225ea4f65ea0b203c2949e1597a9",
"commit": "85413085032f305896da8bad287a83d53fb0b196",
"checkout": null,
"context": {
"cookiecutter": {
Expand Down
7 changes: 7 additions & 0 deletions .github/workflows/static_type_checks.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# THIS ACTION WILL:
# 1. Install dependencies
# 2. Run static type checker

# SETUP:
# None required except for the Makefile

name: static_type_checks

on:
Expand Down
12 changes: 7 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
# This workflow will:
# 1) install Python dependencies
# 2) run make test
# THIS ACTION WILL:
# 1) install Python dependencies
# 2) run make test

# SETUP:
# None required except for the Makefile

name: Tests
name: test
on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
pytest:
test:
runs-on: ${{ matrix.os }}
permissions:
contents: read
Expand Down
16 changes: 9 additions & 7 deletions docs/evaluation/datasets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import random
from typing import Any, Dict, List
from typing import Any

import augmenty
import catalogue
Expand All @@ -15,7 +17,7 @@


@datasets.register("dane")
def dane() -> Dict[str, List[Example]]:
def dane() -> dict[str, list[Example]]:
from dacy.datasets import dane as _dane

train, dev, test = _dane(splits=["train", "dev", "test"]) # type: ignore
Expand All @@ -34,7 +36,7 @@ def augment_dataset(
augmenters: dict,
n_rep: int = 20,
split: str = "test",
) -> List[Example]:
) -> list[Example]:
# ensure seed
random.seed(42)
np.random.seed(42)
Expand Down Expand Up @@ -63,25 +65,25 @@ def augment_dataset(


@datasets.register("gender_bias_dane")
def dane_gender_bias() -> Dict[str, List[Example]]:
def dane_gender_bias() -> dict[str, list[Example]]:
return {"test": augment_dataset("dane", augmenters=get_gender_bias_augmenters())}


@datasets.register("robustness_dane")
def dane_robustness() -> Dict[str, List[Example]]:
def dane_robustness() -> dict[str, list[Example]]:
return {"test": augment_dataset("dane", augmenters=get_robustness_augmenters())}


@datasets.register("dansk")
def dansk(**kwargs: Any) -> Dict[str, List[Example]]:
def dansk(**kwargs: Any) -> dict[str, list[Example]]:
splits = ["train", "dev", "test"]

if not Doc.has_extension("meta"):
Doc.set_extension("meta", default={}, force=True)

nlp = spacy.blank("da")

def convert_to_doc(example: Dict) -> Doc:
def convert_to_doc(example: dict) -> Doc:
doc = Doc(nlp.vocab).from_json(example)
# set metadata
for k in ["dagw_source", "dagw_domain", "dagw_source_full"]:
Expand Down
14 changes: 7 additions & 7 deletions docs/evaluation/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import random
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
from typing import Any, Callable, Optional

import numpy as np
import pandas as pd
Expand All @@ -15,11 +15,11 @@


def bootstrap(
examples: List[Example],
examples: list[Example],
n_rep: int = 100,
n_samples: Optional[int] = None,
getter: Optional[Callable] = None,
) -> List[Dict[str, Any]]:
) -> list[dict[str, Any]]:
random.seed(42)
scorer = Scorer()
scores = []
Expand All @@ -35,7 +35,7 @@ def bootstrap(
return scores


def compute_mean_and_ci(scores: List[Dict[str, Any]]) -> Dict[str, Any]:
def compute_mean_and_ci(scores: list[dict[str, Any]]) -> dict[str, Any]:
ent_f = [score["ents_f"] for score in scores]
# filter out None
ent_f = [x for x in ent_f if x is not None]
Expand Down Expand Up @@ -116,7 +116,7 @@ def doc_from_json(json_obj: dict, nlp: Language) -> Doc:

def predictions_to_disk(
save_path: Path,
examples: List[Example],
examples: list[Example],
mdl_name: str,
time_in_seconds: float,
) -> dict:
Expand Down Expand Up @@ -199,7 +199,7 @@ def apply_models(


def create_dataframe(
examples: List[Example],
examples: list[Example],
mdl_name: str,
decimals: int = 1,
n_rep: int = 100,
Expand All @@ -212,7 +212,7 @@ def create_dataframe(
"Models": mdl_name,
}

def score_to_string(score: Dict[str, Any], decimals: int = 1) -> str:
def score_to_string(score: dict[str, Any], decimals: int = 1) -> str:
if score["mean"] == 0:
return " "
return f"{100*score['mean']:.{decimals}f} ({100*score['ci'][0]:.{decimals}f}, {100*score['ci'][1]:.{decimals}f})"
Expand Down
6 changes: 3 additions & 3 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ static-type-check:
lint:
@echo "--- 🧹 Running linters ---"
ruff format . # running ruff formatting
ruff src/ --fix # running ruff linting
ruff tests/ --fix
ruff docs/conf.py --fix
ruff check src/ --fix # running ruff linting
ruff check tests/ --fix
ruff check docs/conf.py --fix

test:
@echo "--- 🧪 Running tests ---"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
### pip install daluke==0.0.5
from typing import Iterable, List
from typing import Iterable

from daluke import AutoNERDaLUKE, predict_ner
from spacy.lang.da import Danish
Expand All @@ -18,7 +18,7 @@ def apply_daluke(
examples: Iterable[Example],
use_spacy: bool = True,
batch_size: int = 16,
) -> List[Example]:
) -> list[Example]:
docs_y, sentences = list(), list()
for example in examples:
# Tokenization using spacy or nltk
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# to download the danlp and nerda you will have to set up a certificate:
import ssl
from typing import Iterable, List
from typing import Iterable, list

from NERDA.precooked import DA_BERT_ML
from spacy.lang.da import Danish
Expand All @@ -17,7 +17,7 @@
nlp_da = Danish()


def apply_nerda(examples: Iterable[Example], use_spacy: bool = True) -> List[Example]:
def apply_nerda(examples: Iterable[Example], use_spacy: bool = True) -> list[Example]:
sentences = []
docs_y = []
for example in examples:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Callable, Iterable, List
from typing import Callable, Iterable

from spacy.tokens import Doc, Span
from spacy.training import Example
Expand All @@ -12,12 +12,12 @@ def no_misc_getter(doc, attr):
yield span


def add_iob(doc: Doc, iob: List[str]) -> Doc:
def add_iob(doc: Doc, iob: list[str]) -> Doc:
"""Add iob tags to Doc.

Args:
doc (Doc): A SpaCy doc
iob (List[str]): a list of tokens on the IOB format
iob (list[str]): a list of tokens on the IOB format

Returns:
Doc: A doc with the spans to the new IOB
Expand Down
81 changes: 40 additions & 41 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,7 @@ repository = "https://github.com/centre-for-humanities-computing/DaCy"
file = "LICENSE"
name = "Apache License 2.0"
[project.optional-dependencies]
dev = [
"cruft>=2.0.0",
"pyright>=1.1.339",
"ruff>=0.0.270",
]
dev = ["cruft>=2.0.0", "pyright>=1.1.339", "ruff==0.7.1"]
tests = ["pytest>=7.1.2", "pytest-cov>=3.0.0", "pytest-instafail>=0.4.2"]
docs = [
"sphinx==5.3.0",
Expand Down Expand Up @@ -110,6 +106,40 @@ pythonPlatform = "Darwin"

[tool.ruff]
# extend-include = ["*.ipynb"]

# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".hg",
".nox",
".pants.d",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"venv",
"__init__.py",
".env",
"__pycache__",
"dev/**",
"training/main/**",
"training/ner_fine_grained/**",
"papers/DaCy-A-Unified-Framework-for-Danish-NLP/**",
"docs/performance_testing_utils/**",
]
target-version = "py39"

[tool.ruff.lint]
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
select = [
"A",
Expand Down Expand Up @@ -150,54 +180,23 @@ ignore = [
"ANN202",
"COM812",
]
ignore-init-module-imports = true
# Allow autofix for all enabled rules (when `--fix`) is provided.
unfixable = ["ERA"]
# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".hg",
".nox",
".pants.d",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"venv",
"__init__.py",
".venv",
".env",
".git",
"__pycache__",
"dev/**",
"training/main/**",
"training/ner_fine_grained/**",
"papers/DaCy-A-Unified-Framework-for-Danish-NLP/**",
"docs/performance_testing_utils/**",
]


# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
target-version = "py38"


[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.ruff.flake8-annotations]
[tool.ruff.lint.flake8-annotations]
mypy-init-return = true
suppress-none-returning = true


[tool.ruff.mccabe]
[tool.ruff.lint.mccabe]
# Unlike Flake8, default to a complexity level of 10.
max-complexity = 10

Expand Down
10 changes: 5 additions & 5 deletions src/dacy/datasets/dane.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys
from os import PathLike
from pathlib import Path
from typing import List, Optional, Union
from typing import Optional, Union

from spacy.training.corpus import Corpus

Expand All @@ -15,19 +15,19 @@

def dane( # noqa
save_path: Optional[PathLike] = None, # type: ignore
splits: List[str] = ["train", "dev", "test"], # noqa # type: ignore
splits: list[str] = ["train", "dev", "test"], # noqa # type: ignore
redownload: bool = False,
n_sents: int = 1,
open_unverified_connection: bool = False,
**kwargs, # noqa
) -> Union[List[Corpus], Corpus]: # type: ignore
) -> Union[list[Corpus], Corpus]: # type: ignore
"""Reads the DaNE dataset as a spacy Corpus.

Args:
save_path (str, optional): Path to the DaNE dataset If it does not contain the
dataset it is downloaded to the folder. Defaults to None corresponding to
dacy.where_is_my_dacy() in the datasets subfolder.
splits (List[str], optional): Which splits of the dataset should be returned.
splits (list[str], optional): Which splits of the dataset should be returned.
Possible options include "train", "dev", "test", "all". Defaults to
["train", "dev", "test"].
redownload (bool, optional): Should the dataset be redownloaded. Defaults to
Expand All @@ -40,7 +40,7 @@ def dane( # noqa
whether it already exists. Defaults to False.

Returns:
Union[List[Corpus], Corpus]: Returns a SpaCy corpus or a list thereof.
Union[list[Corpus], Corpus]: Returns a SpaCy corpus or a list thereof.

Example:
>>> from dacy.datasets import dane
Expand Down
Loading
Loading