Skip to content

Commit

Permalink
scrape game remarks
Browse files Browse the repository at this point in the history
  • Loading branch information
djbrown committed Dec 18, 2024
1 parent 7936833 commit 5bd509d
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 3 deletions.
4 changes: 2 additions & 2 deletions domain_model.puml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@
note top of Game
1 home_team
1 guest_team
? forfeiting_team
? forfeiting_team()
end note
@enduml
@enduml
3 changes: 3 additions & 0 deletions src/base/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def scrape_game(
guest_team = Team.objects.get(league=league, short_name=game_row[6].text)
home_goals, guest_goals = parsing.parse_goals(game_row)
report_number = parsing.parse_report_number(game_row[10])
remark = parsing.parse_game_remark(game_row[10])
forfeiting_team = parsing.parse_forfeiting_team(game_row[10], home_team, guest_team)

sports_hall = None
Expand All @@ -79,6 +80,7 @@ def scrape_game(
home_goals=home_goals,
guest_goals=guest_goals,
report_number=report_number,
remark=remark,
forfeiting_team=forfeiting_team,
)
LOGGER.info("CREATED Game: %s", game)
Expand All @@ -94,6 +96,7 @@ def scrape_game(
"report_number": report_number,
"opening_whistle": opening_whistle,
"sports_hall": sports_hall,
"remark": remark,
"forfeiting_team": forfeiting_team,
}
updated = ensure_defaults(game, defaults)
Expand Down
7 changes: 7 additions & 0 deletions src/base/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,13 @@ def parse_report_number(cell: _Element) -> int | None:
return None


def parse_game_remark(cell: _Element) -> str:
titles = cell.xpath("a/@title")
if titles:
return titles[0]
return ""


def parse_forfeiting_team(cell: _Element, home_team: Team, guest_team: Team) -> Team | None:
text = str(html.tostring(cell))
if "2:0" in text:
Expand Down
18 changes: 18 additions & 0 deletions src/games/migrations/0004_game_remark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.1.3 on 2024-12-17 15:11

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("games", "0003_alter_game_forfeiting_team_alter_game_sports_hall"),
]

operations = [
migrations.AddField(
model_name="game",
name="remark",
field=models.TextField(blank=True),
),
]
1 change: 1 addition & 0 deletions src/games/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class Game(models.Model):
home_goals = models.IntegerField(blank=True, null=True)
guest_goals = models.IntegerField(blank=True, null=True)
report_number = models.IntegerField(blank=True, null=True, unique=True)
remark = models.TextField(blank=True)
forfeiting_team = models.ForeignKey(
Team,
on_delete=models.SET_NULL,
Expand Down
5 changes: 4 additions & 1 deletion src/games/tests/game_table_single_game.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
<td>124</td>
<td>:</td>
<td>122</td>
<td><a href="https://spo.handball4all.de/misc/sboPublicReports.php?sGID=123456" target="_blank">PI</a> </td>
<td>
<a href="https://spo.handball4all.de/misc/sboPublicReports.php?sGID=123456" target="_blank">PI</a>
<a style="cursor:help" title="some remark">some..</a>
</td>
</tr>
</table>
3 changes: 3 additions & 0 deletions src/games/tests/integration/test_import_games.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def test_update_game(self):
self.assertEqual(game.home_goals, 124)
self.assertEqual(game.guest_goals, 122)
self.assertEqual(game.report_number, 123456)
self.assertEqual(game.remark, "some remark")

def test_score_relevant_change(self):
self.assert_command("import_associations", "-a", 35)
Expand Down Expand Up @@ -121,6 +122,7 @@ def test_forfeit_with_report(self):
game: Game = self.assert_objects(Game)
self.assertEqual(game.number, 24000443)
self.assertEqual(game.report_number, 2518871)
self.assertEqual(game.remark, "(0:2), gg. Heim, techn. Wertung")
self.assertEqual(game.home_goals, 0)
self.assertEqual(game.guest_goals, 0)
self.assertEqual(game.forfeiting_team, game.home_team)
Expand All @@ -135,6 +137,7 @@ def test_forfeit_without_report(self):
game: Game = self.assert_objects(Game)
self.assertEqual(game.number, 24000484)
self.assertEqual(game.report_number, None)
self.assertEqual(game.remark, "(2:0), gg. Gast")
self.assertEqual(game.home_goals, 0)
self.assertEqual(game.guest_goals, 0)
self.assertEqual(game.forfeiting_team, game.guest_team)
Expand Down
36 changes: 36 additions & 0 deletions src/games/tests/unit/test_parse_report_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,39 @@ def test_rescheduled(self):
)
expected = 123456
self.assert_from_markup(markup, expected)


class ParseGameRemarkTest(TestCase):
def assert_from_markup(self, markup, expected):
dom = parsing.html_dom(markup)
actual = parsing.parse_game_remark(dom)
self.assertEqual(expected, actual)

def test_empty(self):
markup = "<td></td>"
expected = ""
self.assert_from_markup(markup, expected)

def test_report_only(self):
markup = (
"<td>"
'<a href="https://spo.handball4all.de/misc/sboPublicReports.php?sGID=123456" target="_blank">PI</a> '
"</td>"
)
expected = ""
self.assert_from_markup(markup, expected)

def test_remark_only(self):
markup = '<td><a style="cursor:help" title="(2:0), gg. Gast">(2:0..</a></td>'
expected = "(2:0), gg. Gast"
self.assert_from_markup(markup, expected)

def test_report_and_remark(self):
markup = (
"<td>"
'<a href="https://spo.handball4all.de/misc/sboPublicReports.php?sGID=123456" target="_blank">PI</a> '
'<a style="cursor:help" title="geä. Anspielzeit">geä..</a>'
"</td>"
)
expected = "geä. Anspielzeit"
self.assert_from_markup(markup, expected)

0 comments on commit 5bd509d

Please sign in to comment.