Skip to content

Commit

Permalink
Remove the toxicity demos from the LIT examples.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 640521996
  • Loading branch information
llcourage authored and LIT team committed Jun 5, 2024
1 parent 6aa2eb6 commit c2fb41b
Show file tree
Hide file tree
Showing 5 changed files with 1 addition and 137 deletions.
32 changes: 0 additions & 32 deletions lit_nlp/examples/datasets/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,35 +187,3 @@ def spec(self) -> lit_types.Spec:
"label": lit_types.CategoryLabel(vocab=self.LABELS),
}


class ToxicityData(lit_dataset.Dataset):
"""Jigsaw toxicity dataset; see https://www.tensorflow.org/datasets/catalog/wikipedia_toxicity_subtypes."""

LABELS = ["non-toxic", "toxic"]

def __init__(self, split="test", max_seq_len=500):
"""Dataset constructor, loads the data into memory."""
raw_examples = load_tfds("wikipedia_toxicity_subtypes", split=split)
self._examples = [] # populate this with data records
for record in raw_examples:
self._examples.append({
"sentence": record["text"].decode("utf-8"),
"label": self.LABELS[int(record["toxicity"])],
"identity_attack": bool(int(record["identity_attack"])),
"insult": bool(int(record["insult"])),
"obscene": bool(int(record["obscene"])),
"severe_toxicity": bool(int(record["severe_toxicity"])),
"threat": bool(int(record["threat"]))
})

def spec(self) -> lit_types.Spec:
"""Dataset spec, which should match the model"s input_spec()."""
return {
"sentence": lit_types.TextSegment(),
"label": lit_types.CategoryLabel(vocab=self.LABELS),
"identity_attack": lit_types.Boolean(),
"insult": lit_types.Boolean(),
"obscene": lit_types.Boolean(),
"severe_toxicity": lit_types.Boolean(),
"threat": lit_types.Boolean()
}
22 changes: 0 additions & 22 deletions lit_nlp/examples/models/glue_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,25 +648,3 @@ def input_spec(self):
ret = super().input_spec()
ret[self.config.label_name] = lit_types.Scalar(min_val=0, max_val=5)
return ret


class ToxicityModel(GlueModel):
"""Classification model on Jigsaw Toxicity Dataset."""

def __init__(self, *args, **kw):
super().__init__(
*args,
text_a_name="sentence",
text_b_name=None,
labels=["non-toxic", "toxic"],
null_label_idx=0,
**kw)

def output_spec(self) -> Spec:
ret = super().output_spec()
ret["probas"] = lit_types.MulticlassPreds(
parent=self.config.label_name,
vocab=self.config.labels,
null_idx=self.config.null_label_idx,
threshold=0.3)
return ret
5 changes: 0 additions & 5 deletions lit_nlp/examples/tools/glue_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from absl import flags
from absl import logging

from lit_nlp.examples.datasets import classification
from lit_nlp.examples.datasets import glue
from lit_nlp.examples.models import glue_models
from lit_nlp.lib import serialize
Expand Down Expand Up @@ -130,10 +129,6 @@ def main(argv: Sequence[str]) -> None:
train_data = glue.STSBData("train")
val_data = glue.STSBData("validation")
model = glue_models.STSBModel(_ENCODER_NAME.value)
elif _TASK.value == "toxicity":
train_data = classification.ToxicityData("train")
val_data = classification.ToxicityData("test")
model = glue_models.ToxicityModel(_ENCODER_NAME.value)
else:
raise ValueError(f"Unrecognized task name: '{_TASK.value:s}'")

Expand Down
75 changes: 0 additions & 75 deletions lit_nlp/examples/toxicity_demo.py

This file was deleted.

4 changes: 1 addition & 3 deletions website/sphinx_src/components.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,7 @@ implemented with the `MulticlassPreds` and `CategoryLabel` types.
* A negative class can be designated using the `null_idx` attribute of
`MulticlassPreds` (most commonly, `null_idx=0`), and metrics such as
precision, recall, F1 will be computed for the remaining classes. AUC and
AUCPR will be computed for binary classification tasks. For an example, see
the
[comment toxicity model](https://github.com/PAIR-code/lit/blob/main/lit_nlp/examples/models/glue_models.py?l=518&rcl=386779180).
AUCPR will be computed for binary classification tasks.
* If `null_idx` is set and there is only one other class, the other class
(often, class `1`) is treated as a positive class, and the LIT UI can be
used to change the classification threshold. If `null_idx` is set and there
Expand Down

0 comments on commit c2fb41b

Please sign in to comment.