From 70def36e125d688331d32a427d215453a2abeb6f Mon Sep 17 00:00:00 2001 From: Johannes Meyer zum Alten Borgloh Date: Sat, 8 Jul 2017 17:47:56 +0200 Subject: [PATCH] Improved raw size handling - Allows to specify which hosts to check for a _raw image version. The failback now iterates through all sizes of the original found host until a valid size was found that can be downloaded. - The tumblr hosts can be set in the settings.json in the TumblrHosts variable. The first entry is check first, if there was no _raw image found, the next entry is checked, until the list is exhausted. Then the _1280 size of the original host is tried, the _500, _400 until the request was successfully. --- src/TumblThree/SharedAssemblyInfo.cs | 4 +- .../Downloader/Downloader.cs | 4 +- .../TumblThree.Applications/FileDownloader.cs | 64 +++++++++++++++---- .../Properties/AppSettings.cs | 11 ++++ 4 files changed, 68 insertions(+), 15 deletions(-) diff --git a/src/TumblThree/SharedAssemblyInfo.cs b/src/TumblThree/SharedAssemblyInfo.cs index 9e1daea..bc53b39 100644 --- a/src/TumblThree/SharedAssemblyInfo.cs +++ b/src/TumblThree/SharedAssemblyInfo.cs @@ -12,5 +12,5 @@ [assembly: ComVisible(false)] [assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.MainAssembly)] -[assembly: AssemblyVersion("1.0.6.8")] -[assembly: AssemblyFileVersion("1.0.6.8")] +[assembly: AssemblyVersion("1.0.6.9")] +[assembly: AssemblyFileVersion("1.0.6.9")] diff --git a/src/TumblThree/TumblThree.Applications/Downloader/Downloader.cs b/src/TumblThree/TumblThree.Applications/Downloader/Downloader.cs index d326b60..59e52d7 100644 --- a/src/TumblThree/TumblThree.Applications/Downloader/Downloader.cs +++ b/src/TumblThree/TumblThree.Applications/Downloader/Downloader.cs @@ -394,8 +394,10 @@ private async Task DownloadPhotoAsync(IProgress pro TumblrPost downloadItem, CancellationToken ct) { string blogDownloadLocation = blog.DownloadLocation(); - string fileName = FileName(downloadItem); string url = Url(downloadItem); + var fileDownloader = new FileDownloader(); + url = await fileDownloader.TestImageRawUrl(url, shellService.Settings); + string fileName = url.Split('/').Last(); string fileLocation = FileLocation(blogDownloadLocation, fileName); string fileLocationUrlList = FileLocationLocalized(blogDownloadLocation, Resources.FileNamePhotos); DateTime postDate = PostDate(downloadItem); diff --git a/src/TumblThree/TumblThree.Applications/FileDownloader.cs b/src/TumblThree/TumblThree.Applications/FileDownloader.cs index 8e3c0f0..41225ef 100644 --- a/src/TumblThree/TumblThree.Applications/FileDownloader.cs +++ b/src/TumblThree/TumblThree.Applications/FileDownloader.cs @@ -166,18 +166,7 @@ public async Task DownloadFileWithResumeAsync(string url, string destinati } else { - // TODO: Ugly hack: Many "_raw" requests seem to fail with '403 -- access denied' whereas usually - // the file just contained a lower resolution if it didn't had the specified size. - // We just replace the "_raw" with "_1280" and try again since "_1280" images are around for longer. - // Else, we finally give up. - if (url.Contains("_raw")) - { - url = url.Replace("_raw", "_1280"); - } - else - { - throw; - } + throw; } } } @@ -185,6 +174,57 @@ public async Task DownloadFileWithResumeAsync(string url, string destinati } } + public async Task TestImageRawUrl(string url, AppSettings settings) + { + if (settings.ImageSize == "raw") + { + return await TestRawUrl(url, settings); + } + return url; + } + + public async Task TestRawUrl(string url, AppSettings settings) + { + if (!url.Contains("_raw")) + return url; + string path = new Uri(url).LocalPath.TrimStart('/'); + + foreach (string host in settings.TumblrHosts) + { + string rawUrl = "https://" + host + "/" + path; + if (await UrlExists(rawUrl, settings)) + return rawUrl; + } + + foreach (string size in settings.ImageSizes) + { + string rawUrl = url.Replace(settings.ImageSize, size); + if (await UrlExists(rawUrl, settings)) + return rawUrl; + } + + return url; + } + + private async Task UrlExists(string url, AppSettings settings) + { + HttpWebRequest request = CreateWebReqeust(url, settings); + request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; + request.Method = "HEAD"; + + try + { + using (var response = (HttpWebResponse)await request.GetResponseAsync()) + { + return response.StatusCode == HttpStatusCode.OK; + } + } + catch + { + return false; + } + } + public static async Task SaveStreamToDisk(Stream input, string destinationFileName, CancellationToken ct) { using (var stream = new FileStream(destinationFileName, FileMode.OpenOrCreate, FileAccess.Write)) diff --git a/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs b/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs index 2855fe3..19556eb 100644 --- a/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs +++ b/src/TumblThree/TumblThree.Applications/Properties/AppSettings.cs @@ -27,6 +27,12 @@ public sealed class AppSettings : IExtensibleDataObject "1080", "480" }; + private static readonly string[] tumblrHosts = + new string[] + { + "media.tumblr.com", "68.media.tumblr.com", "66.media.tumblr.com" + }; + public AppSettings() { Initialize(); @@ -230,6 +236,11 @@ public ObservableCollection BlogTypes get { return new ObservableCollection(blogTypes); } } + public ObservableCollection TumblrHosts + { + get { return new ObservableCollection(tumblrHosts); } + } + ExtensionDataObject IExtensibleDataObject.ExtensionData { get; set; } private void Initialize()