Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

179 add summary of changes found in scrapping #213

Merged
merged 12 commits into from
Dec 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion .github/workflows/scrapping.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Update NOLPL database

on:
schedule:
- cron: 0 0 * * 3
- cron: 0 0 * * *
workflow_dispatch:

jobs:
Expand Down Expand Up @@ -41,9 +41,36 @@ jobs:
sed -i -e "s@${old_line}@${new_line}@g" app.yaml

- name: Create Pull Request
id: cpr
uses: peter-evans/create-pull-request@v6
with:
base: main
commit-message: "chore(database): update database"
title: Perform scrapping to get last database version
branch: update-noplp-database

- name: Fetch branches to compare
run: |
git fetch origin update-noplp-database main --depth=1

- name: Write comment source text using Python script
run: |
{
echo 'SCRIPT_OUTPUT<<EOF'
poetry run python noplp/compare_changes.py
echo EOF
} >> "$GITHUB_ENV"

- name: Write summary comment in Pull Request
uses: actions/github-script@v7
env:
COMMENT_BODY: ${{env.SCRIPT_OUTPUT}}
PR_NUMBER: ${{steps.cpr.outputs.pull-request-number}}
with:
script: |
github.rest.issues.createComment({
issue_number: process.env.PR_NUMBER,
owner: context.repo.owner,
repo: context.repo.repo,
body: process.env.COMMENT_BODY
})
106 changes: 106 additions & 0 deletions noplp/compare_changes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import contextlib
import csv
import difflib
import io
import subprocess


def compare_diff():
# Get the output of git diff
diff_output = subprocess.check_output(
[
"git",
"diff",
"origin/update-noplp-database:data/db_lyrics.csv",
"origin/main:data/db_lyrics.csv",
"-U0",
]
).decode("utf-8")

# Initialize lines
lines = diff_output.splitlines()[4:]

# Initialize variables to store the added and removed lines
added_lines = []
removed_lines = []
updated_songs = []

# Loop through the lines
for line in lines:
song_details = list(csv.reader([line[1:]]))[0]
# Check if the line is a modification (starting with "-" or "+")
if line.startswith("-"):
removed_lines.append(song_details)
elif line.startswith("+"):
added_lines.append(song_details)

# Initialize variables to store the updated, new and removed songs
plus_titles = {line[0] for line in added_lines}
minus_titles = {line[0] for line in removed_lines}
updated_titles = plus_titles & minus_titles
new_songs = [
song for song in added_lines if song[0] in list(plus_titles - updated_titles)
]
removed_songs = [
song for song in removed_lines if song[0] in list(minus_titles - updated_titles)
]

# # Loop through the updated lines
for added_line in added_lines:
if added_line[0] in updated_titles:
# Find associated remove line
removed_line = next(
(song for song in removed_lines if song[0] == added_line[0]), None
)
# Add the song to the updated songs list
updated_songs.append(
(added_line[0], added_line[1], diff(added_line[2], removed_line[2]))
)

# Print the results
print("Updates songs:\n")
for song in sorted(list(updated_songs), key=lambda x: x[0]):
parse_song_print(song)
print("\n----\nNew songs:\n")
for song in sorted(list(new_songs), key=lambda x: x[0]):
parse_song_print(song, unescape_new_line=True)
print("\n----\nRemoved songs:\n")
for song in sorted(list(removed_songs), key=lambda x: x[0]):
parse_song_print(song, unescape_new_line=True)


def parse_song_print(song, unescape_new_line=False):
print("<details>")
print(f"<summary>{song[0]}, de {song[1]}</summary>\n<pre><code>")
if unescape_new_line:
print(song[2].replace("\\n", "\n"))
else:
print(song[2])
print("</code></pre>\n</details>")


def diff(old_lyrics, new_lyrics):
# Crée un objet StringIO pour capturer la sortie de print()
old_output = io.StringIO()
new_output = io.StringIO()

# Redirige la sortie de print() vers l'objet StringIO
with contextlib.redirect_stdout(old_output):
print(old_lyrics)
with contextlib.redirect_stdout(new_output):
print(new_lyrics)

# Récupère la sortie de print() sous forme de chaîne de caractères
old_output = old_output.getvalue().strip().replace("\\n", "\n")
new_output = new_output.getvalue().strip().replace("\\n", "\n")

expected = old_output.splitlines(1)
actual = new_output.splitlines(1)

diff_files = difflib.unified_diff(expected, actual)

return diff_files


# Call the function
compare_diff()
Loading