Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ABC News Australia #3

Merged
merged 1 commit into from
Feb 15, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 33 additions & 2 deletions ovos_ocp_news_plugin/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import feedparser
import pytz
import re
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from datetime import timedelta
from lingua_franca.time import now_local
from ovos_ocp_rss_plugin import OCPRSSFeedExtractor
Expand All @@ -16,6 +18,7 @@ class OCPNewsExtractor(OCPStreamExtractor):
GPB_URL = "http://feeds.feedburner.com/gpbnews"
GR1_URL = "https://www.raiplaysound.it"
FT_URL = "https://www.ft.com"
ABC_URL = "https://www.abc.net.au/news"

def __init__(self, ocp_settings=None):
super().__init__(ocp_settings)
Expand All @@ -37,7 +40,7 @@ def validate_uri(self, uri):
return any([uri.startswith(sei) for sei in self.supported_seis]) or \
any([uri.startswith(url) for url in [
self.TSF_URL, self.GBP_URL, self.NPR_URL,
self.GR1_URL, self.FT_URL
self.GR1_URL, self.FT_URL, self.ABC_URL
]])

def extract_stream(self, uri, video=True):
Expand All @@ -54,6 +57,8 @@ def extract_stream(self, uri, video=True):
return self.gr1()
elif uri.startswith(self.FT_URL):
return self.ft()
elif uri.startswith(self.ABC_URL):
return self.abc()

@classmethod
def tsf(cls):
Expand Down Expand Up @@ -136,15 +141,41 @@ def ft(cls):
uri = mp3_soup.find('source')['src']
return {"uri": uri, "title": "FT news briefing", "author": "Financial Times"}

@classmethod
def abc(cls):
"""Custom news fetcher for ABC News Australia briefing"""
# Format template with (hour, day, month)
url_temp = ('https://abcmedia.akamaized.net/news/audio/news-briefings/'
'top-stories/{}{}/NAUs_{}00flash_{}{}_nola.mp3')
now = pytz.utc.localize(datetime.utcnow())
syd_tz = pytz.timezone('Australia/Sydney')
syd_dt = now.astimezone(syd_tz)
hour = syd_dt.strftime('%H')
day = syd_dt.strftime('%d')
month = syd_dt.strftime('%m')
year = syd_dt.strftime('%Y')
url = url_temp.format(year, month, hour, day, month)

# If this hours news is unavailable try the hour before
response = requests.get(url)
if response.status_code != 200:
hour = str(int(hour) - 1)
url = url_temp.format(year, month, hour, day, month)

return {"uri": url,
"title": "ABC News Australia",
"author": "Australian Broadcasting Corporation"}


if __name__ == "__main__":
# dedicated parsers
print(OCPNewsExtractor.ft())
print(OCPNewsExtractor.abc())
exit()
print(OCPNewsExtractor.npr())
print(OCPNewsExtractor.tsf())
print(OCPNewsExtractor.gr1())
print(OCPNewsExtractor.gpb())
print(OCPNewsExtractor.ft())
# RSS
print(OCPRSSFeedExtractor.get_rss_first_stream("rss//https://www.cbc.ca/podcasting/includes/hourlynews.xml"))
print(OCPRSSFeedExtractor.get_rss_first_stream("rss//https://podcasts.files.bbci.co.uk/p02nq0gn.rss"))
Expand Down