From 8b69cddfb92b3b7548d3f90dbec1038c728ea5d9 Mon Sep 17 00:00:00 2001 From: kamtschatka Date: Mon, 30 Sep 2024 02:02:48 +0200 Subject: [PATCH] feature(web): Add ability to manually trigger full page archives. Fixes #398 (#418) * [Feature Request] Ability to select what to "crawl full page archive" #398 Added the ability to start a full page crawl for links and also in bulk operations added the ability to refresh links as a bulk operation as well * minor icon and wording changes --------- Co-authored-by: MohamedBassem --- .../dashboard/BulkBookmarksAction.tsx | 51 ++++++++++++++++++- .../dashboard/bookmarks/BookmarkOptions.tsx | 25 +++++++++ apps/workers/crawlerWorker.ts | 8 +-- packages/shared/queues.ts | 3 +- packages/trpc/routers/bookmarks.ts | 8 ++- 5 files changed, 89 insertions(+), 6 deletions(-) diff --git a/apps/web/components/dashboard/BulkBookmarksAction.tsx b/apps/web/components/dashboard/BulkBookmarksAction.tsx index 0867fdd2..39e13940 100644 --- a/apps/web/components/dashboard/BulkBookmarksAction.tsx +++ b/apps/web/components/dashboard/BulkBookmarksAction.tsx @@ -8,10 +8,21 @@ import { import ActionConfirmingDialog from "@/components/ui/action-confirming-dialog"; import { useToast } from "@/components/ui/use-toast"; import useBulkActionsStore from "@/lib/bulkActions"; -import { CheckCheck, Hash, Link, List, Pencil, Trash2, X } from "lucide-react"; +import { + CheckCheck, + FileDown, + Hash, + Link, + List, + Pencil, + RotateCw, + Trash2, + X, +} from "lucide-react"; import { useDeleteBookmark, + useRecrawlBookmark, useUpdateBookmark, } from "@hoarder/shared-react/hooks/bookmarks"; import { BookmarkTypes } from "@hoarder/shared/types/bookmarks"; @@ -63,11 +74,35 @@ export default function BulkBookmarksAction() { onError, }); + const recrawlBookmarkMutator = useRecrawlBookmark({ + onSuccess: () => { + setIsBulkEditEnabled(false); + }, + onError, + }); + interface UpdateBookmarkProps { favourited?: boolean; archived?: boolean; } + const recrawlBookmarks = async (archiveFullPage: boolean) => { + const links = selectedBookmarks.filter( + (item) => item.content.type === BookmarkTypes.LINK, + ); + await Promise.all( + links.map((item) => + recrawlBookmarkMutator.mutateAsync({ + bookmarkId: item.id, + archiveFullPage, + }), + ), + ); + toast({ + description: `${links.length} bookmarks will be ${archiveFullPage ? "re-crawled and archived!" : "refreshed!"}`, + }); + }; + function isClipboardAvailable() { if (typeof window === "undefined") { return false; @@ -172,6 +207,20 @@ export default function BulkBookmarksAction() { isPending: updateBookmarkMutator.isPending, hidden: !isBulkEditEnabled, }, + { + name: "Download Full Page Archive", + icon: , + action: () => recrawlBookmarks(true), + isPending: recrawlBookmarkMutator.isPending, + hidden: !isBulkEditEnabled, + }, + { + name: "Refresh", + icon: , + action: () => recrawlBookmarks(false), + isPending: recrawlBookmarkMutator.isPending, + hidden: !isBulkEditEnabled, + }, { name: "Delete", icon: , diff --git a/apps/web/components/dashboard/bookmarks/BookmarkOptions.tsx b/apps/web/components/dashboard/bookmarks/BookmarkOptions.tsx index 4007090e..c09d2e50 100644 --- a/apps/web/components/dashboard/bookmarks/BookmarkOptions.tsx +++ b/apps/web/components/dashboard/bookmarks/BookmarkOptions.tsx @@ -11,6 +11,7 @@ import { import { useToast } from "@/components/ui/use-toast"; import { useClientConfig } from "@/lib/clientConfig"; import { + FileDown, Link, List, ListX, @@ -88,6 +89,15 @@ export default function BookmarkOptions({ bookmark }: { bookmark: ZBookmark }) { onError, }); + const fullPageArchiveBookmarkMutator = useRecrawlBookmark({ + onSuccess: () => { + toast({ + description: "Full Page Archive creation has been triggered", + }); + }, + onError, + }); + const removeFromListMutator = useRemoveBookmarkFromList({ onSuccess: () => { toast({ @@ -152,6 +162,21 @@ export default function BookmarkOptions({ bookmark }: { bookmark: ZBookmark }) { /> {bookmark.archived ? "Un-archive" : "Archive"} + + {bookmark.content.type === BookmarkTypes.LINK && ( + { + fullPageArchiveBookmarkMutator.mutate({ + bookmarkId: bookmark.id, + archiveFullPage: true, + }); + }} + > + + Download Full Page Archive + + )} + {bookmark.content.type === BookmarkTypes.LINK && ( { diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 49e9495d..246e9050 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -214,7 +214,7 @@ async function getBookmarkDetails(bookmarkId: string) { }); if (!bookmark || !bookmark.link) { - throw new Error("The bookmark either doesn't exist or not a link"); + throw new Error("The bookmark either doesn't exist or is not a link"); } return { url: bookmark.link.url, @@ -519,6 +519,7 @@ async function crawlAndParseUrl( oldScreenshotAssetId: string | undefined, oldImageAssetId: string | undefined, oldFullPageArchiveAssetId: string | undefined, + archiveFullPage: boolean, ) { const { htmlContent, @@ -578,7 +579,7 @@ async function crawlAndParseUrl( ]); return async () => { - if (serverConfig.crawler.fullPageArchive) { + if (serverConfig.crawler.fullPageArchive || archiveFullPage) { const fullPageArchiveAssetId = await archiveWebpage( htmlContent, browserUrl, @@ -615,7 +616,7 @@ async function runCrawler(job: DequeuedJob) { return; } - const { bookmarkId } = request.data; + const { bookmarkId, archiveFullPage } = request.data; const { url, userId, @@ -654,6 +655,7 @@ async function runCrawler(job: DequeuedJob) { oldScreenshotAssetId, oldImageAssetId, oldFullPageArchiveAssetId, + archiveFullPage, ); } diff --git a/packages/shared/queues.ts b/packages/shared/queues.ts index 756f095d..cadeefd0 100644 --- a/packages/shared/queues.ts +++ b/packages/shared/queues.ts @@ -17,8 +17,9 @@ export function runQueueDBMigrations() { export const zCrawlLinkRequestSchema = z.object({ bookmarkId: z.string(), runInference: z.boolean().optional(), + archiveFullPage: z.boolean().optional().default(false), }); -export type ZCrawlLinkRequest = z.infer; +export type ZCrawlLinkRequest = z.input; export const LinkCrawlerQueue = new SqliteQueue( "link_crawler_queue", diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts index 9990c6ed..9b23c88e 100644 --- a/packages/trpc/routers/bookmarks.ts +++ b/packages/trpc/routers/bookmarks.ts @@ -426,11 +426,17 @@ export const bookmarksAppRouter = router({ } }), recrawlBookmark: authedProcedure - .input(z.object({ bookmarkId: z.string() })) + .input( + z.object({ + bookmarkId: z.string(), + archiveFullPage: z.boolean().optional().default(false), + }), + ) .use(ensureBookmarkOwnership) .mutation(async ({ input }) => { await LinkCrawlerQueue.enqueue({ bookmarkId: input.bookmarkId, + archiveFullPage: input.archiveFullPage, }); }), getBookmark: authedProcedure