From 169a9ca6a63a8d271b90547c86aa8c0284b05db8 Mon Sep 17 00:00:00 2001 From: emschu Date: Fri, 27 Dec 2024 13:01:15 +0100 Subject: [PATCH 1/4] Fix incomplete ARD image link checking Signed-off-by: emschu --- ard.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ard.go b/ard.go index 81de92a..5c15127 100644 --- a/ard.go +++ b/ard.go @@ -37,9 +37,10 @@ const ( ) var ( - ardProgramURLMatcher = regexp.MustCompile(`^https:\/\/www\.ardmediathek\.de\/video\/.+`) - ardProgramPageMatcher = regexp.MustCompile(`^https:\/\/programm\-api\.ard\.de\/program\/api\/teaser\?teaserId=.+`) - ardImageLinkUrlMatcher = regexp.MustCompile(`^https:\/\/api\.ardmediathek\.de\/image-service\/image.+`) + ardProgramURLMatcher = regexp.MustCompile(`^https:\/\/www\.ardmediathek\.de\/video\/.+`) + ardProgramPageMatcher = regexp.MustCompile(`^https:\/\/programm\-api\.ard\.de\/program\/api\/teaser\?teaserId=.+`) + ardImageLinkUrlMatcher = regexp.MustCompile(`^https:\/\/api\.ardmediathek\.de\/image-service\/image.+`) + ardImageLinkUrlMatcher2 = regexp.MustCompile(`^https:\/\/programm-api.ard.de\/images\/.+`) ) // ARDParser struct of ard parser code @@ -144,7 +145,7 @@ func (a *ARDParser) handleDay(channel Channel, day time.Time) { if len(programEntry.ImageLinks) > 0 { for _, img := range programEntry.ImageLinks { - if !ardImageLinkUrlMatcher.MatchString(img.URL) { + if !ardImageLinkUrlMatcher.MatchString(img.URL) && !ardImageLinkUrlMatcher2.MatchString(img.URL) { appLog(fmt.Sprintf("Found invalid image link '%s' for program entry with hash '%s'. Skipping.", img.URL, programEntry.Hash)) atomic.AddUint64(&status.TotalSkippedPE, 1) continue From 357092718151d9c5b55730ebda7611a6ee86e547 Mon Sep 17 00:00:00 2001 From: worker Date: Fri, 27 Dec 2024 13:25:44 +0100 Subject: [PATCH 2/4] update changelog --- Changelog.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Changelog.md b/Changelog.md index b2ebdd4..60f728d 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,8 @@ # Changelog oerc +## 0.17.1 - 2024/12/27 +- Fix problem validating ard image urls + ## 0.17.0 - 2024/12/26 - Golang 1.23, toolchain and dependency updates - Changed fetching ARD program information using new ARD program and tv show API From 3ecf6b3452c9a1a3fd3cee8f033928529529b66f Mon Sep 17 00:00:00 2001 From: worker Date: Fri, 27 Dec 2024 13:30:15 +0100 Subject: [PATCH 3/4] remove code smells --- ard.go | 70 +++++++++++++++++++++++++++---------------------------- server.go | 6 ++++- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/ard.go b/ard.go index 5c15127..51e5bf5 100644 --- a/ard.go +++ b/ard.go @@ -37,10 +37,39 @@ const ( ) var ( - ardProgramURLMatcher = regexp.MustCompile(`^https:\/\/www\.ardmediathek\.de\/video\/.+`) - ardProgramPageMatcher = regexp.MustCompile(`^https:\/\/programm\-api\.ard\.de\/program\/api\/teaser\?teaserId=.+`) - ardImageLinkUrlMatcher = regexp.MustCompile(`^https:\/\/api\.ardmediathek\.de\/image-service\/image.+`) - ardImageLinkUrlMatcher2 = regexp.MustCompile(`^https:\/\/programm-api.ard.de\/images\/.+`) + ardProgramURLMatcher = regexp.MustCompile(`^https:\/\/www\.ardmediathek\.de\/video\/.+`) + ardProgramPageMatcher = regexp.MustCompile(`^https:\/\/programm\-api\.ard\.de\/program\/api\/teaser\?teaserId=.+`) + ardImageLinkMatcher = regexp.MustCompile(`^https:\/\/api\.ardmediathek\.de\/image-service\/image.+`) + ardImageLinkMatcher2 = regexp.MustCompile(`^https:\/\/programm-api.ard.de\/images\/.+`) + ardTvShowCategories = []string{ + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "#", + } ) // ARDParser struct of ard parser code @@ -145,7 +174,7 @@ func (a *ARDParser) handleDay(channel Channel, day time.Time) { if len(programEntry.ImageLinks) > 0 { for _, img := range programEntry.ImageLinks { - if !ardImageLinkUrlMatcher.MatchString(img.URL) && !ardImageLinkUrlMatcher2.MatchString(img.URL) { + if !ardImageLinkMatcher.MatchString(img.URL) && !ardImageLinkMatcher2.MatchString(img.URL) { appLog(fmt.Sprintf("Found invalid image link '%s' for program entry with hash '%s'. Skipping.", img.URL, programEntry.Hash)) atomic.AddUint64(&status.TotalSkippedPE, 1) continue @@ -237,37 +266,8 @@ func (a *ARDParser) fetchTVShows() { } // build set of urls to fetch tv shows from - var tvShowCategories = []string{ - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "#", - } var tvShowApiURLs = make([]string, 0) - for _, category := range tvShowCategories { + for _, category := range ardTvShowCategories { categoryString := strings.TrimSuffix(base64.StdEncoding.EncodeToString([]byte("ARD."+category)), "=") tvShowApiURLs = append(tvShowApiURLs, fmt.Sprintf("%s%s", ardMediaThekApiTvShowPath, categoryString)) } diff --git a/server.go b/server.go index 81bc717..0d6b952 100644 --- a/server.go +++ b/server.go @@ -143,7 +143,11 @@ func initRouter() *gin.Engine { } } else { // trust no proxies - except the user specified one - r.SetTrustedProxies(nil) + err := r.SetTrustedProxies(nil) + if err != nil { + log.Fatal(fmt.Sprint("Problem to trust nil proxy url")) + return nil + } } r.Use(gin.Recovery()) From d453e7ed12aff1191dfd7087e2f12762622cfd73 Mon Sep 17 00:00:00 2001 From: worker Date: Fri, 27 Dec 2024 13:34:22 +0100 Subject: [PATCH 4/4] update readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 06f69d5..91e761f 100644 --- a/README.md +++ b/README.md @@ -223,12 +223,12 @@ to be available too, so ensure to enable the postgres database service as well. ### Channel list -**Channels:** ARD, ZDF, 3Sat, ARTE, ZDFInfo, ZDFNeo, Phoenix, KiKa, ARD One, Tagesschau24, ARD Alpha, SWR RP Fernsehen,WDR Fernsehen, SWR BW Fernsehen,SR Fernsehen, Radio Bremen TV, RBB Fernsehen, NDR Fernsehen, MDR Fernsehen, HR Fernsehen, BR Fernsehen, ORF eins, ORF 2, ORF III, ORF Sport +, SRF-1, SRF-zwei, SRF-info. +**Channels:** ARD, ZDF, 3Sat, ARTE, ZDFInfo, ZDFNeo, Phoenix, KiKa, ARD One, Tagesschau24, ARD Alpha, SWR, WDR, SR, Radio Bremen TV, RBB, NDR, MDR, HR, BR, ORF eins, ORF 2, ORF III, ORF Sport +, SRF-1, SRF-zwei, SRF-info. ### Time range import limits | Channel family | Earliest date | Latest date | -|----------------|-----------------| --------------- | -| ARD | ~ 2010/01 | Today + 6 weeks | +|----------------|-----------------|-----------------| +| ARD | Today - 8 days | Today + 8 days | | ZDF | 2015/03 | Today + 6 weeks | | ORF | Today - 14 days | Today + 22 days | | SRF | Today - 14 days | Today + 29 days |