Skip to content

Commit

Permalink
Add Twitter implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas694 committed Jul 23, 2021
1 parent 1a4154b commit d182354
Show file tree
Hide file tree
Showing 47 changed files with 3,599 additions and 203 deletions.
4 changes: 2 additions & 2 deletions src/TumblThree/SharedAssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@

[assembly: ComVisible(false)]
[assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.MainAssembly)]
[assembly: AssemblyVersion("1.6.5.0")]
[assembly: AssemblyFileVersion("1.6.5.0")]
[assembly: AssemblyVersion("2.0.0.0")]
[assembly: AssemblyFileVersion("2.0.0.0")]
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ private async Task RunCrawlerTasksAsync(PauseToken pt, CancellationToken ct)
ICrawler crawler = _crawlerFactory.GetCrawler(blog, new Progress<DownloadProgress>(), pt, ct);
try
{
crawler.IsBlogOnlineAsync().Wait(4000);
crawler.IsBlogOnlineAsync().Wait(4000, ct);
}
catch (AggregateException ex)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,11 @@ private IReadOnlyList<IBlog> GetIBlogsCore(string directory)
{
blogs.Add(new TumblrTagSearchBlog().Load(filename));
}

if (filename.EndsWith(BlogTypes.twitter.ToString()))
{
blogs.Add(new TwitterBlog().Load(filename));
}
}
catch (SerializationException ex)
{
Expand Down Expand Up @@ -509,7 +514,7 @@ private void EnqueueAutoDownload()
}
}

private bool CanAddBlog() => _blogFactory.IsValidTumblrBlogUrl(_crawlerService.NewBlogUrl) || _blogFactory.IsValidUrl(_crawlerService.NewBlogUrl);
private bool CanAddBlog() => _blogFactory.IsValidBlogUrl(_crawlerService.NewBlogUrl) || _blogFactory.IsValidUrl(_crawlerService.NewBlogUrl);

private async Task AddBlog()
{
Expand Down Expand Up @@ -796,11 +801,11 @@ private async Task UpdateMetaInformationAsync(IBlog blog)

private async Task<IBlog> CheckIfCrawlableBlog(string blogUrl)
{
if (!_blogFactory.IsValidTumblrBlogUrl(blogUrl) && _blogFactory.IsValidUrl(blogUrl))
if (!_blogFactory.IsValidBlogUrl(blogUrl) && _blogFactory.IsValidUrl(blogUrl))
{
if ( await _tumblrBlogDetector.IsTumblrBlogWithCustomDomainAsync(blogUrl))
return TumblrBlog.Create(blogUrl, Path.Combine(_shellService.Settings.DownloadLocation, "Index"), _shellService.Settings.FilenameTemplate, true);
throw new Exception($"The url '{blogUrl}' cannot be recognized as Tumblr blog!");
throw new Exception($"The url '{blogUrl}' cannot be recognized as valid blog!");
}
return _blogFactory.GetBlog(blogUrl, Path.Combine(_shellService.Settings.DownloadLocation, "Index"), _shellService.Settings.FilenameTemplate);
}
Expand Down
16 changes: 11 additions & 5 deletions src/TumblThree/TumblThree.Applications/Crawler/AbstractCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ public abstract class AbstractCrawler
protected IShellService ShellService { get; }
protected PauseToken Pt { get; }
protected CancellationToken Ct { get; }
protected IPostQueue<TumblrPost> PostQueue { get; }
protected IPostQueue<AbstractPost> PostQueue { get; }
protected ConcurrentBag<TumblrPost> StatisticsBag { get; set; } = new ConcurrentBag<TumblrPost>();
protected List<string> Tags { get; set; } = new List<string>();

protected IDownloader Downloader;

protected AbstractCrawler(IShellService shellService, ICrawlerService crawlerService, IProgress<DownloadProgress> progress, IWebRequestFactory webRequestFactory,
ISharedCookieService cookieService, IPostQueue<TumblrPost> postQueue, IBlog blog, IDownloader downloader,
ISharedCookieService cookieService, IPostQueue<AbstractPost> postQueue, IBlog blog, IDownloader downloader,
PauseToken pt, CancellationToken ct)
{
ShellService = shellService;
Expand Down Expand Up @@ -292,13 +292,19 @@ protected void AddToDownloadList(TumblrPost addToList)

protected ulong GetLastPostId()
{
ulong lastId = Blog.LastId;
if (Blog.ForceRescan)
{
return 0;
}
return !string.IsNullOrEmpty(Blog.DownloadPages) ? 0 : Blog.LastId;
}

return !string.IsNullOrEmpty(Blog.DownloadPages) ? 0 : lastId;
protected void GenerateTags()
{
if (!string.IsNullOrWhiteSpace(Blog.Tags))
{
Tags = Blog.Tags.Split(',').Select(x => x.Trim()).ToList();
}
}

protected void UpdateBlogStats(bool add)
Expand Down Expand Up @@ -393,7 +399,7 @@ protected bool HandleLimitExceededWebException(WebException webException)
return false;
}

Logger.Error("{0}, {1}", string.Format(CultureInfo.CurrentCulture, Resources.LimitExceeded, Blog.Name), webException);
Logger.Error("{0}, {1}", string.Format(CultureInfo.CurrentCulture, Resources.LimitExceeded, Blog.Name), webException); //TODO: 2nd resource
ShellService.ShowError(webException, Resources.LimitExceeded, Blog.Name);
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public abstract class AbstractTumblrCrawler : AbstractCrawler
protected AbstractTumblrCrawler(IShellService shellService, ICrawlerService crawlerService, IWebRequestFactory webRequestFactory, ISharedCookieService cookieService,
ITumblrParser tumblrParser, IImgurParser imgurParser, IGfycatParser gfycatParser, IWebmshareParser webmshareParser,
IMixtapeParser mixtapeParser, IUguuParser uguuParser, ISafeMoeParser safemoeParser, ILoliSafeParser lolisafeParser,
ICatBoxParser catboxParser, IPostQueue<TumblrPost> postQueue, IBlog blog, IDownloader downloader, IProgress<DownloadProgress> progress, PauseToken pt, CancellationToken ct)
ICatBoxParser catboxParser, IPostQueue<AbstractPost> postQueue, IBlog blog, IDownloader downloader, IProgress<DownloadProgress> progress, PauseToken pt, CancellationToken ct)
: base(shellService, crawlerService, progress, webRequestFactory, cookieService, postQueue, blog, downloader, pt, ct)
{
this.TumblrParser = tumblrParser;
Expand Down Expand Up @@ -124,14 +124,6 @@ protected string ResizeTumblrImageUrl(string imageUrl)
.ToString();
}

protected void GenerateTags()
{
if (!string.IsNullOrWhiteSpace(Blog.Tags))
{
Tags = Blog.Tags.Split(',').Select(x => x.Trim()).ToList();
}
}

protected bool CheckIfSkipGif(string imageUrl)
{
return Blog.SkipGif && imageUrl.EndsWith(".gif") || imageUrl.EndsWith(".gifv");
Expand Down
54 changes: 33 additions & 21 deletions src/TumblThree/TumblThree.Applications/Crawler/CrawlerFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@

using TumblThree.Applications.DataModels;
using TumblThree.Applications.DataModels.TumblrApiJson;
using TumblThree.Applications.DataModels.TumblrCrawlerData;
using TumblThree.Applications.DataModels.TumblrPosts;
using TumblThree.Applications.DataModels.CrawlerData;
using TumblThree.Applications.Downloader;
using TumblThree.Applications.Parser;
using TumblThree.Applications.Properties;
Expand All @@ -18,6 +17,7 @@
using TumblThree.Domain.Models;
using TumblThree.Domain.Models.Blogs;
using TumblThree.Domain.Models.Files;
using TumblThree.Applications.DataModels.Twitter.TimelineTweets;

namespace TumblThree.Applications.Crawler
{
Expand Down Expand Up @@ -59,26 +59,26 @@ public ICrawler GetCrawler(IBlog blog)

public ICrawler GetCrawler(IBlog blog, IProgress<DownloadProgress> progress, PauseToken pt, CancellationToken ct)
{
IPostQueue<TumblrPost> postQueue = GetProducerConsumerCollection();
IPostQueue<AbstractPost> postQueue = GetProducerConsumerCollection();
IFiles files = LoadFiles(blog);
IWebRequestFactory webRequestFactory = GetWebRequestFactory();
IImgurParser imgurParser = GetImgurParser(webRequestFactory, ct);
IGfycatParser gfycatParser = GetGfycatParser(webRequestFactory, ct);
switch (blog.BlogType)
{
case BlogTypes.tumblr:
IPostQueue<TumblrCrawlerData<Post>> jsonApiQueue = GetJsonQueue<Post>();
IPostQueue<CrawlerData<Post>> jsonApiQueue = GetJsonQueue<Post>();
return new TumblrBlogCrawler(shellService, crawlerService, webRequestFactory, cookieService,
GetTumblrDownloader(progress, blog, files, postQueue, pt, ct), GetTumblrJsonDownloader(jsonApiQueue, blog, pt, ct),
GetTumblrDownloader(progress, blog, files, postQueue, pt, ct), GetJsonDownloader(jsonApiQueue, blog, pt, ct),
GetTumblrApiJsonToTextParser(blog), GetTumblrParser(), imgurParser, gfycatParser, GetWebmshareParser(),
GetMixtapeParser(), GetUguuParser(), GetSafeMoeParser(), GetLoliSafeParser(), GetCatBoxParser(), postQueue,
jsonApiQueue, blog, progress, pt, ct);
case BlogTypes.tmblrpriv:
IPostQueue<TumblrCrawlerData<DataModels.TumblrSvcJson.Post>> jsonSvcQueue =
IPostQueue<CrawlerData<DataModels.TumblrSvcJson.Post>> jsonSvcQueue =
GetJsonQueue<DataModels.TumblrSvcJson.Post>();
return new TumblrHiddenCrawler(shellService, crawlerService, webRequestFactory,
cookieService, GetTumblrDownloader(progress, blog, files, postQueue, pt, ct),
GetTumblrJsonDownloader(jsonSvcQueue, blog, pt, ct), GetTumblrSvcJsonToTextParser(blog), GetTumblrParser(),
GetJsonDownloader(jsonSvcQueue, blog, pt, ct), GetTumblrSvcJsonToTextParser(blog), GetTumblrParser(),
imgurParser, gfycatParser, GetWebmshareParser(), GetMixtapeParser(), GetUguuParser(), GetSafeMoeParser(),
GetLoliSafeParser(), GetCatBoxParser(), postQueue, jsonSvcQueue, blog, progress, pt, ct);
case BlogTypes.tlb:
Expand All @@ -87,19 +87,24 @@ public ICrawler GetCrawler(IBlog blog, IProgress<DownloadProgress> progress, Pau
imgurParser, gfycatParser, GetWebmshareParser(), GetMixtapeParser(), GetUguuParser(),
GetSafeMoeParser(), GetLoliSafeParser(), GetCatBoxParser(), postQueue, blog, progress, pt, ct);
case BlogTypes.tumblrsearch:
IPostQueue<TumblrCrawlerData<DataModels.TumblrSearchJson.Datum>> jsonQueue = GetJsonQueue<DataModels.TumblrSearchJson.Datum>();
IPostQueue<CrawlerData<DataModels.TumblrSearchJson.Datum>> jsonQueue = GetJsonQueue<DataModels.TumblrSearchJson.Datum>();
return new TumblrSearchCrawler(shellService, crawlerService, webRequestFactory,
cookieService, GetTumblrDownloader(progress, blog, files, postQueue, pt, ct), GetTumblrJsonDownloader(jsonQueue, blog, pt, ct),
cookieService, GetTumblrDownloader(progress, blog, files, postQueue, pt, ct), GetJsonDownloader(jsonQueue, blog, pt, ct),
GetTumblrParser(), imgurParser, gfycatParser, GetWebmshareParser(), GetMixtapeParser(), GetUguuParser(),
GetSafeMoeParser(), GetLoliSafeParser(), GetCatBoxParser(), postQueue, jsonQueue, blog, progress, pt, ct);
case BlogTypes.tumblrtagsearch:
IPostQueue<TumblrCrawlerData<DataModels.TumblrTaggedSearchJson.Datum>> jsonTagSearchQueue =
IPostQueue<CrawlerData<DataModels.TumblrTaggedSearchJson.Datum>> jsonTagSearchQueue =
GetJsonQueue<DataModels.TumblrTaggedSearchJson.Datum>();
return new TumblrTagSearchCrawler(shellService, crawlerService, webRequestFactory,
cookieService, GetTumblrDownloader(progress, blog, files, postQueue, pt, ct),
GetTumblrJsonDownloader(jsonTagSearchQueue, blog, pt, ct), GetTumblrParser(),
GetJsonDownloader(jsonTagSearchQueue, blog, pt, ct), GetTumblrParser(),
imgurParser, gfycatParser, GetWebmshareParser(), GetMixtapeParser(), GetUguuParser(),
GetSafeMoeParser(), GetLoliSafeParser(), GetCatBoxParser(), postQueue, jsonTagSearchQueue, blog, progress, pt, ct);
case BlogTypes.twitter:
IPostQueue<CrawlerData<Tweet>> jsonTwitterQueue = GetJsonQueue<Tweet>();
return new TwitterCrawler(shellService, crawlerService, progress, webRequestFactory,
cookieService, postQueue, jsonTwitterQueue, blog, GetTwitterDownloader(progress, blog, files, postQueue, pt, ct),
GetJsonDownloader(jsonTwitterQueue, blog, pt, ct), pt, ct);
default:
throw new ArgumentException("Website is not supported!", nameof(blog));
}
Expand Down Expand Up @@ -183,28 +188,35 @@ private static IBlogService GetBlogService(IBlog blog, IFiles files)
return new BlogService(blog, files);
}

private TwitterDownloader GetTwitterDownloader(IProgress<DownloadProgress> progress, IBlog blog, IFiles files,
IPostQueue<AbstractPost> postQueue, PauseToken pt, CancellationToken ct)
{
return new TwitterDownloader(shellService, managerService, ct, pt, progress, postQueue, GetFileDownloader(ct),
crawlerService, blog, files);
}

private TumblrDownloader GetTumblrDownloader(IProgress<DownloadProgress> progress, IBlog blog, IFiles files,
IPostQueue<TumblrPost> postQueue, PauseToken pt, CancellationToken ct)
IPostQueue<AbstractPost> postQueue, PauseToken pt, CancellationToken ct)
{
return new TumblrDownloader(shellService, managerService, pt, progress, postQueue, GetFileDownloader(ct),
crawlerService, blog, files, ct);
}

private TumblrXmlDownloader GetTumblrXmlDownloader(IPostQueue<TumblrCrawlerData<XDocument>> xmlQueue, IBlog blog,
private TumblrXmlDownloader GetTumblrXmlDownloader(IPostQueue<CrawlerData<XDocument>> xmlQueue, IBlog blog,
PauseToken pt, CancellationToken ct)
{
return new TumblrXmlDownloader(shellService, pt, xmlQueue, crawlerService, blog, ct);
}

private TumblrJsonDownloader<T> GetTumblrJsonDownloader<T>(IPostQueue<TumblrCrawlerData<T>> jsonQueue, IBlog blog,
private JsonDownloader<T> GetJsonDownloader<T>(IPostQueue<CrawlerData<T>> jsonQueue, IBlog blog,
PauseToken pt, CancellationToken ct)
{
return new TumblrJsonDownloader<T>(shellService, pt, jsonQueue, crawlerService, blog, ct);
return new JsonDownloader<T>(shellService, pt, jsonQueue, crawlerService, blog, ct);
}

private IPostQueue<TumblrPost> GetProducerConsumerCollection()
private IPostQueue<AbstractPost> GetProducerConsumerCollection()
{
return new PostQueue<TumblrPost>(new ConcurrentQueue<TumblrPost>());
return new PostQueue<AbstractPost>(new ConcurrentQueue<AbstractPost>());
}

private ITumblrApiXmlToTextParser GetTumblrApiXmlToTextParser()
Expand Down Expand Up @@ -238,14 +250,14 @@ private ITumblrToTextParser<Post> GetTumblrApiJsonToTextParser(IBlog blog)
}
}

private IPostQueue<TumblrCrawlerData<XDocument>> GetApiXmlQueue()
private IPostQueue<CrawlerData<XDocument>> GetApiXmlQueue()
{
return new PostQueue<TumblrCrawlerData<XDocument>>(new ConcurrentQueue<TumblrCrawlerData<XDocument>>());
return new PostQueue<CrawlerData<XDocument>>(new ConcurrentQueue<CrawlerData<XDocument>>());
}

private IPostQueue<TumblrCrawlerData<T>> GetJsonQueue<T>()
private IPostQueue<CrawlerData<T>> GetJsonQueue<T>()
{
return new PostQueue<TumblrCrawlerData<T>>(new ConcurrentQueue<TumblrCrawlerData<T>>());
return new PostQueue<CrawlerData<T>>(new ConcurrentQueue<CrawlerData<T>>());
}
}
}
Loading

0 comments on commit d182354

Please sign in to comment.