Skip to content

Commit

Permalink
feat: Extract screenshots from CWS and Firefox addons
Browse files Browse the repository at this point in the history
  • Loading branch information
aklinker1 committed Oct 23, 2024
1 parent 74249c1 commit 3338836
Show file tree
Hide file tree
Showing 11 changed files with 137 additions and 4 deletions.
1 change: 1 addition & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SERVER_ORIGIN=http://localhost:3000
Binary file modified bun.lockb
Binary file not shown.
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
"dataloader": "^2.2.2",
"graphql": "^16.8.0",
"linkedom": "^0.15.3",
"picocolors": "^1.0.0"
"picocolors": "^1.0.0",
"radix3": "^1.1.2"
},
"devDependencies": {
"@aklinker1/check": "^1.2.0",
Expand Down
8 changes: 8 additions & 0 deletions src/apis/firefox-api.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import consola from "consola";
import { buildScreenshotUrl } from "../utils/urls";

export function createFirefoxApiClient() {
return {
Expand Down Expand Up @@ -29,6 +30,13 @@ export function createFirefoxApiClient() {
storeUrl: json.url,
version: json.current_version.version,
dailyActiveUsers: json.average_daily_users,
screenshots: (json.previews as any[]).map<Gql.Screenshot>(
(preview, i) => ({
index: i,
rawUrl: preview.image_url,
indexUrl: buildScreenshotUrl("firefox", json.id, i),
}),
),
};
},
};
Expand Down
27 changes: 26 additions & 1 deletion src/crawlers/chrome-crawler.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import consola from "consola";
import { HTMLAnchorElement, HTMLElement, parseHTML } from "linkedom";
import { buildScreenshotUrl } from "../utils/urls";

export async function crawlExtension(
id: string,
Expand All @@ -21,7 +22,7 @@ export async function crawlExtension(
const { document } = parseHTML(html);

// Uncomment to debug HTML
// Bun.write("chrome.html", document.documentElement.outerHTML);
Bun.write("chrome.html", document.documentElement.outerHTML);

// Basic metadata
const name = metaContent(document, "property=og:title")?.replace(
Expand Down Expand Up @@ -106,6 +107,23 @@ export async function crawlExtension(
// const rating = extractNumber(ratingDiv.title); // "Average rating: 4.78 stars"
// const reviewCount = extractNumber(ratingDiv.textContent); // "(1024)"

// <div
// aria-label="Item media 1 screenshot"
// data-media-url="https://lh3.googleusercontent.com/GUgh0ThX2FDPNvbaumYl4DqsUhsbYiCe-Hut9FoVEnkmTrXyA-sHbMk5jmZTj_t-dDP8rAmy6X6a6GNTCn9F8zo4VYU"
// data-is-video="false"
// data-slide-index="0"
// >
const screenshots = [...document.querySelectorAll("div[data-media-url]")]
.filter((div) => div.getAttribute("data-is-video") === "false")
.map<Gql.Screenshot>((div) => {
const index = Number(div.getAttribute("data-slide-index") || -1);
return {
index,
rawUrl: div.getAttribute("data-media-url") + "=s1280", // "s1280" gets the full resolution
indexUrl: buildScreenshotUrl("chrome", id, index),
};
});

if (name == null) return;
if (storeUrl == null) return;
if (iconUrl == null) return;
Expand All @@ -114,6 +132,12 @@ export async function crawlExtension(
if (version == null) return;
if (shortDescription == null) return;
if (longDescription == null) return;
if (
screenshots.some(
(screenshot) => screenshot.index === -1 || !screenshot.rawUrl,
)
)
return;

const result: Gql.ChromeExtension = {
id,
Expand All @@ -127,6 +151,7 @@ export async function crawlExtension(
longDescription,
rating,
reviewCount,
screenshots,
};
consola.debug("Crawl results:", result);
return result;
Expand Down
15 changes: 15 additions & 0 deletions src/rest/getChromeScreenshot.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import type { ChromeService } from "../services/chrome-service";
import { RouteHandler } from "../utils/rest-router";

export const getChromeScreenshot =
(chrome: ChromeService): RouteHandler<{ id: string; index: string }> =>
async (params) => {
const extension = await chrome.getExtension(params.id);
const index = Number(params.index);
const screenshot = extension?.screenshots.find(
(screenshot) => screenshot.index == index,
);

if (screenshot == null) return new Response(null, { status: 404 });
return Response.redirect(screenshot.rawUrl);
};
17 changes: 17 additions & 0 deletions src/schema.gql
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type ChromeExtension {
lastUpdated: String!
rating: Float
reviewCount: Int
screenshots: [Screenshot!]!
}

type FirefoxAddon {
Expand All @@ -47,4 +48,20 @@ type FirefoxAddon {
lastUpdated: String!
rating: Float
reviewCount: Int
screenshots: [Screenshot!]!
}

type Screenshot {
"""
The screenshot's order.
"""
index: Int!
"""
The image's raw URL provided by the service. When screenshots are updated, this URL changes.
"""
rawUrl: String!
"""
URL to the image based on the index. If the raw URL changes, the `indexUrl` will remain constant, good for links in README.md files.
"""
indexUrl: String!
}
19 changes: 17 additions & 2 deletions src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import playgroundHtmlTemplate from "./public/playground.html";
import consola from "consola";
import { createChromeService } from "./services/chrome-service";
import { createFirefoxService } from "./services/firefox-service";
import { createRestRouter } from "./utils/rest-router";
import { getChromeScreenshot } from "./rest/getChromeScreenshot";

const playgroundHtml = playgroundHtmlTemplate.replace(
"{{VERSION}}",
Expand All @@ -22,6 +24,11 @@ export function createServer(config?: ServerConfig) {
firefox,
});

const restRouter = createRestRouter().get(
"/api/rest/chrome/:id/screenshots/:index",
getChromeScreenshot(chrome),
);

const httpServer = Bun.serve({
port,
error(request) {
Expand All @@ -32,8 +39,16 @@ export function createServer(config?: ServerConfig) {
return createResponse(undefined, { status: 204 });
}

// GraphQL
if (req.url.endsWith("/api")) {
const url = new URL(req.url, process.env.SERVER_ORIGIN);

// REST

if (url.pathname.startsWith("/api/rest")) {
return restRouter.fetch(url, req);
}

if (url.pathname.startsWith("/api")) {
// GraphQL
const data = await graphql.evaluateQuery(req);

return createResponse(JSON.stringify(data), {
Expand Down
2 changes: 2 additions & 0 deletions src/services/chrome-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,5 @@ export function createChromeService() {
},
};
}

export type ChromeService = ReturnType<typeof createChromeService>;
42 changes: 42 additions & 0 deletions src/utils/rest-router.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import * as radix3 from "radix3";

export type RouteHandler<TParams = {}> = (
params: TParams,
url: URL,
req: Request,
) => Response | Promise<Response>;

export interface Route {
method: string;
handler: RouteHandler;
}

export function createRestRouter() {
const r = radix3.createRouter<Route>();
const router = {
get(path: string, handler: RouteHandler) {
r.insert(path, { method: "GET", handler });
return router;
},
post(path: string, handler: RouteHandler) {
r.insert(path, { method: "POST", handler });
return router;
},
any(path: string, handler: RouteHandler) {
r.insert(path, { method: "ANY", handler });
return router;
},
on(method: string, path: string, handler: RouteHandler) {
r.insert(path, { method, handler });
return router;
},
async fetch(url: URL, req: Request): Promise<Response> {
const match = r.lookup(url.pathname);
if (match && (req.method === match.method || match.method === "ANY")) {
return await match.handler(match.params ?? {}, url, req);
}
return new Response(null, { status: 404 });
},
};
return router;
}
7 changes: 7 additions & 0 deletions src/utils/urls.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
export function buildScreenshotUrl(
type: "chrome" | "firefox",
id: string,
index: number,
) {
return `${process.env.SERVER_ORIGIN}/api/rest/${type}/${id}/screenshots/${index}`;
}

0 comments on commit 3338836

Please sign in to comment.