Skip to content

Commit 7551886

Browse files
pavel-karatsiubakelson42
authored andcommitted
Fix scraper to new upstream Phets Web site
1 parent 40cff0b commit 7551886

File tree

1 file changed

+5
-8
lines changed

1 file changed

+5
-8
lines changed

steps/get.ts

+5-8
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import {log} from '../lib/logger';
1414
import {cats, rootCategories} from '../lib/const';
1515
import welcome from '../lib/welcome';
1616
import {SimulationsList} from '../lib/classes';
17-
import {barOptions, getIdAndLanguage} from '../lib/common';
17+
import {barOptions} from '../lib/common';
1818
import type {Category, LanguageDescriptor, LanguageItemPair, Meta, Simulation} from '../lib/types';
1919
import { exit } from 'yargs';
2020

@@ -187,7 +187,6 @@ const getItemCategories = (lang: string, slug: string): Category[] => {
187187

188188

189189
const fetchSims = async (): Promise<void> => {
190-
// console.log(simsTree);
191190
log.info(`Gathering sim links...`);
192191
const bar = new SingleBar(barOptions, Presets.shades_classic);
193192
bar.start(meta.count, 0);
@@ -220,23 +219,21 @@ const fetchSims = async (): Promise<void> => {
220219
fallback = true;
221220
url = `en/simulation/${(sim.name)}`;
222221
response = await got(url, {...options});
223-
status = response.statusCode;
224222
}
225223
}
226224
if (!response) throw new Error(`Got no response from ${options.prefixUrl}${url}`);
227225
const {body} = response;
228226
if (!body) throw new Error(`Got no data (status = ${status}) from ${options.prefixUrl}${url}`);
229227
const $ = cheerio.load(body);
230-
const link = $('.sim-download').attr('href');
231-
const [realId] = getIdAndLanguage(link);
228+
const realId = sim.name;
232229

233230
catalogs[lang].add({
234231
categories: getItemCategories(lang, realId),
235232
id: realId,
236233
language: lang,
237-
title: title || $('.simulation-main-title').text().trim(),
238-
topics: $('.sim-page-content ul').first().text().split('\n').map(t => t.trim()).filter(a => a),
239-
description: $('.simulation-panel-indent[itemprop]').text()
234+
title: title || $('meta[name="og:title"]').attr('content'),
235+
topics: [], // See https://github.com/openzim/phet/issues/155 for more details
236+
description: $('meta[name="description"]').attr('content')
240237
} as Simulation);
241238

242239
urlsToGet.push(`https://phet.colorado.edu/sims/html/${realId}/latest/${realId}_${lang}.html`);

0 commit comments

Comments
 (0)