-
-
Notifications
You must be signed in to change notification settings - Fork 157
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(citations): add UnmatchedCitation model and logic
Solves #4920 - Add new model UnmatchedCitation on citations app - refactor cl.search.models.Citation to create a BaseCitation abstract model to reuse on the UnmatchedCitation model - updates cl.citations.tasks.store_opinion_citations_and_update_parentheticals to handle storing and updating unmatched citations - updates cl.search.signals to update UnmatchedCitation status when a new Citation is saved - add tests - add update_unmatched_citations command to trigger update for found citations
- Loading branch information
Showing
9 changed files
with
513 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
cl/citations/management/commands/update_unmatched_citations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from cl.citations.management.commands import find_citations | ||
from cl.citations.models import UnmatchedCitation | ||
from cl.lib.command_utils import VerboseCommand | ||
|
||
|
||
class Command(find_citations.Command): | ||
"""Re-run find_citations_and_parentheticals_for_opinion_by_pks for | ||
opinions where unmatched citations have been found | ||
""" | ||
|
||
help = "Try to resolve unmatched citations" | ||
# variables to use find_citations.Command.update_documents | ||
count = 0 | ||
average_per_s = 0.0 | ||
timings: list[float] = [] | ||
|
||
def add_arguments(self, parser): | ||
VerboseCommand.add_arguments(self, parser) | ||
parser.add_argument( | ||
"--resolve-failures", | ||
action="store_true", | ||
default=False, | ||
help="Include citations with FAILED and FAILED_AMBIGUOUS status", | ||
) | ||
parser.add_argument( | ||
"--queue", | ||
default="batch1", | ||
help="The celery queue where the tasks should be processed.", | ||
) | ||
|
||
def handle(self, *args, **options): | ||
"""Re-uses find_citations.Command enqueuer and logging""" | ||
VerboseCommand.handle(self, *args, **options) | ||
status = [UnmatchedCitation.FOUND] | ||
if options["resolve_failures"]: | ||
status.extend( | ||
[UnmatchedCitation.FAILED, UnmatchedCitation.FAILED_AMBIGUOUS] | ||
) | ||
|
||
# distinct() on Django only works when the same field is on .order_by() | ||
opinion_ids = ( | ||
UnmatchedCitation.objects.filter(status__in=status) | ||
.order_by("citing_opinion_id") | ||
.distinct("citing_opinion_id") | ||
) | ||
self.count = opinion_ids.count() | ||
opinion_pks = opinion_ids.values_list("citing_opinion_id", flat=True) | ||
find_citations.Command.update_documents( | ||
self, opinion_pks, options["queue"] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
# Generated by Django 5.1.4 on 2025-01-21 03:45 | ||
|
||
import django.db.models.deletion | ||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
initial = True | ||
|
||
dependencies = [ | ||
("search", "0037_alter_citation_type_noop"), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name="UnmatchedCitation", | ||
fields=[ | ||
( | ||
"id", | ||
models.AutoField( | ||
auto_created=True, | ||
primary_key=True, | ||
serialize=False, | ||
verbose_name="ID", | ||
), | ||
), | ||
( | ||
"volume", | ||
models.SmallIntegerField( | ||
help_text="The volume of the reporter" | ||
), | ||
), | ||
( | ||
"reporter", | ||
models.TextField( | ||
db_index=True, | ||
help_text="The abbreviation for the reporter", | ||
), | ||
), | ||
( | ||
"page", | ||
models.TextField( | ||
help_text="The 'page' of the citation in the reporter. Unfortunately, this is not an integer, but is a string-type because several jurisdictions do funny things with the so-called 'page'. For example, we have seen Roman numerals in Nebraska, 13301-M in Connecticut, and 144M in Montana." | ||
), | ||
), | ||
( | ||
"type", | ||
models.SmallIntegerField( | ||
choices=[ | ||
(1, "A federal reporter citation (e.g. 5 F. 55)"), | ||
( | ||
2, | ||
"A citation in a state-based reporter (e.g. Alabama Reports)", | ||
), | ||
( | ||
3, | ||
"A citation in a regional reporter (e.g. Atlantic Reporter)", | ||
), | ||
( | ||
4, | ||
"A citation in a specialty reporter (e.g. Lawyers' Edition)", | ||
), | ||
( | ||
5, | ||
"A citation in an early SCOTUS reporter (e.g. 5 Black. 55)", | ||
), | ||
( | ||
6, | ||
"A citation in the Lexis system (e.g. 5 LEXIS 55)", | ||
), | ||
( | ||
7, | ||
"A citation in the WestLaw system (e.g. 5 WL 55)", | ||
), | ||
(8, "A vendor neutral citation (e.g. 2013 FL 1)"), | ||
( | ||
9, | ||
"A law journal citation within a scholarly or professional legal periodical (e.g. 95 Yale L.J. 5; 72 Soc.Sec.Rep.Serv. 318)", | ||
), | ||
], | ||
help_text="The type of citation that this is.", | ||
), | ||
), | ||
( | ||
"status", | ||
models.SmallIntegerField( | ||
choices=[ | ||
( | ||
1, | ||
"The citation does not exist in the search_citation table. We couldn't match the citation to a cluster on the previous citation extractor run", | ||
), | ||
( | ||
2, | ||
"The citation exists on the search_citation table. We haven't updated the citing Opinion.html_with_citations yet", | ||
), | ||
( | ||
3, | ||
"The citing Opinion.html_with_citations was updated successfully", | ||
), | ||
( | ||
4, | ||
"The citing Opinion.html_with_citations update failed because the citation is ambiguous", | ||
), | ||
( | ||
5, | ||
"The citing Opinion.html_with_citations update failed", | ||
), | ||
], | ||
help_text="Status of resolution of the initially unmatched citation", | ||
), | ||
), | ||
( | ||
"citation_string", | ||
models.TextField( | ||
help_text="The unparsed citation string in case it doesn't match the regular citation model in BaseCitation" | ||
), | ||
), | ||
( | ||
"court_id", | ||
models.TextField( | ||
help_text="A court_id as identified by eyecite from the opinion's context. May be useful to know where to find missing citations" | ||
), | ||
), | ||
( | ||
"year", | ||
models.TextField( | ||
help_text="A year identified by eyecite from the opinion's context" | ||
), | ||
), | ||
( | ||
"citing_opinion", | ||
models.ForeignKey( | ||
help_text="The opinion citing this citation", | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name="eyecite_citations", | ||
to="search.opinion", | ||
), | ||
), | ||
], | ||
options={ | ||
"indexes": [ | ||
models.Index( | ||
fields=["volume", "reporter", "page"], | ||
name="citations_u_volume_da4d25_idx", | ||
) | ||
], | ||
"unique_together": { | ||
("citing_opinion", "volume", "reporter", "page") | ||
}, | ||
}, | ||
), | ||
] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
from django.db import models | ||
from eyecite.models import FullCaseCitation | ||
|
||
from cl.citations.utils import map_reporter_db_cite_type | ||
from cl.search.models import BaseCitation, Opinion | ||
|
||
|
||
class UnmatchedCitation(BaseCitation): | ||
"""Keep track of citations that could not be resolved to a cluster on the | ||
batch citator run | ||
""" | ||
|
||
UNMATCHED = 1 | ||
FOUND = 2 | ||
RESOLVED = 3 | ||
FAILED_AMBIGUOUS = 4 | ||
FAILED = 5 | ||
STATUS = ( | ||
( | ||
UNMATCHED, | ||
"The citation does not exist in the search_citation table." | ||
" We couldn't match the citation to a cluster on the " | ||
" previous citation extractor run", | ||
), | ||
( | ||
FOUND, | ||
"The citation exists on the search_citation table. We " | ||
" haven't updated the citing Opinion.html_with_citations yet", | ||
), | ||
( | ||
RESOLVED, | ||
"The citing Opinion.html_with_citations was updated successfully", | ||
), | ||
( | ||
FAILED_AMBIGUOUS, | ||
"The citing Opinion.html_with_citations update " | ||
"failed because the citation is ambiguous", | ||
), | ||
(FAILED, "The citing Opinion.html_with_citations update failed"), | ||
) | ||
citing_opinion: models.ForeignKey = models.ForeignKey( | ||
Opinion, | ||
help_text="The opinion citing this citation", | ||
on_delete=models.CASCADE, | ||
related_name="eyecite_citations", | ||
) | ||
status: models.SmallIntegerField = models.SmallIntegerField( | ||
help_text="Status of resolution of the initially unmatched citation", | ||
choices=STATUS, | ||
) | ||
citation_string: models.TextField = models.TextField( | ||
help_text="The unparsed citation string in case it doesn't match the " | ||
"regular citation model in BaseCitation" | ||
) | ||
court_id: models.TextField = models.TextField( | ||
help_text="A court_id as identified by eyecite from the opinion's " | ||
"context. May be useful to know where to find missing citations" | ||
) | ||
year: models.TextField = models.TextField( | ||
help_text="A year identified by eyecite from the opinion's context" | ||
) | ||
|
||
class Meta: | ||
indexes = [ | ||
models.Index( | ||
fields=["volume", "reporter", "page"], | ||
) | ||
] | ||
# | ||
unique_together = (("citing_opinion", "volume", "reporter", "page"),) | ||
|
||
@classmethod | ||
def create_from_eyecite( | ||
cls, eyecite_citation: FullCaseCitation, citing_opinion: Opinion | ||
): | ||
""" | ||
Create an UnmatchedCitation instance using an eyecite FullCaseCitation | ||
Saving is left to the caller | ||
:param eyecite_citation: a FullCaseCitation as returned by | ||
eyecite.get_citations | ||
:param citing_opinion: the opinion which uses the citation | ||
""" | ||
cite_type_str = eyecite_citation.all_editions[0].reporter.cite_type | ||
return cls( | ||
citing_opinion=citing_opinion, | ||
status=cls.UNMATCHED, | ||
citation_string=eyecite_citation.matched_text(), | ||
court_id=eyecite_citation.metadata.court or "", | ||
year=eyecite_citation.metadata.year or "", | ||
volume=eyecite_citation.groups["volume"], | ||
reporter=eyecite_citation.corrected_reporter(), | ||
page=eyecite_citation.groups["page"], | ||
type=map_reporter_db_cite_type(cite_type_str), | ||
) |
Oops, something went wrong.