-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathfind-specs.js
411 lines (358 loc) · 18.5 KB
/
find-specs.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
#!/usr/bin/env node
'use strict';
import fs from "node:fs/promises";
import puppeteer from 'puppeteer';
import path from "node:path";
import { Command } from "commander";
import { execSync } from "node:child_process";
import packageContents from "../package.json" with { type: "json" };
const { version } = packageContents;
import { fileURLToPath } from "node:url";
const scriptPath = path.dirname(fileURLToPath(import.meta.url));
const execParams = { cwd: path.join(scriptPath, '..') };
import computeShortname from "./compute-shortname.js";
import specs from "../index.json" with { type: "json" };
import ignorable from "./data/ignore.json" with { type: "json" };
import monitorList from "./data/monitor.json" with { type: "json" };
const {repos: temporarilyIgnorableRepos, specs: temporarilyIgnorableSpecs} = monitorList;
const nonBrowserSpecWgs = Object.keys(ignorable.groups);
const watchedBrowserCgs = [
"Web Platform Incubator Community Group",
"Web Assembly Community Group",
"Immersive Web Community Group",
"Audio Community Group",
"Privacy Community Group",
"GPU for the Web Community Group"
];
const cssMetaDir = ["shared", "indexes", "bin", ".github", "css-module", "css-module-bikeshed"];
const svgMetaDir = ["template"];
const fxtfMetaDir = [".github", "shared"];
const houdiniMetaDir = [".github", "images"];
function canonicalizeGhUrl(r) {
const url = new URL(r.homepageUrl);
url.protocol = 'https:';
// Exceptionally, the homepage URL may link to the explainer instead of to the
// spec. One example at the time of writing is Storage Buckets:
// https://github.com/WICG/storage-buckets which targets:
// https://wicg.github.io/storage-buckets/explainer
url.pathname = url.pathname.replace(/(\/explainer(\.[^\/]+|\/)?)$/, '/');
if (url.pathname.lastIndexOf('/') === 0 && url.pathname.length > 1) {
url.pathname += '/';
}
// Exceptionally, the homepage URL may link to a fragment within a spec. One
// example at the time of writing is Close Watcher to redirect to the HTML
// spec: https://github.com/WICG/close-watcher
if (url.hash) {
url.hash = '';
}
return {repo: r.owner.login + '/' + r.name, spec: url.toString()};
}
function canonicalizeTRUrl(url) {
url = new URL(url);
url.protocol = 'https:';
return url.toString();
}
const trimSlash = url => url.endsWith('/') ? url.slice(0, -1) : url;
const toGhUrl = repo => { return {repo: `${repo.owner.login}/${repo.name}`, spec: `https://${repo.owner.login.toLowerCase()}.github.io/${repo.name}/`}; };
const matchRepoName = fullName => r => fullName === r.owner.login + '/' + r.name;
const isRelevantRepo = fullName => !Object.keys(ignorable.repos).includes(fullName) && !Object.keys(temporarilyIgnorableRepos).includes(fullName);
const isInScope = ({spec: url, repo: fullName}) =>
!Object.keys(ignorable.specs).includes(url) &&
!Object.keys(temporarilyIgnorableSpecs).includes(url) &&
isRelevantRepo(fullName);
// Set loose parameter when checking loosely if another version exists
const hasMoreRecentLevel = (s, url, loose) => {
try {
const shortnameData = computeShortname(url);
return s.series.shortname === shortnameData.series.shortname
&& (s.seriesVersion > (shortnameData.seriesVersion ?? '')
|| loose && (s.seriesVersion === shortnameData.seriesVersion
// case of CSS drafts whose known editors drafts are version-less, but the directories in the repo use versions
|| !s.seriesVersion
// Case of houdini drafts whose known editors drafts are versioned, but the directories in the repo use version-less
|| (!shortnameData.seriesVersion && s.seriesVersion == 1)
));
} catch (e) {
return false;
}
};
const hasUntrackedURL = ({spec: url}) => {
// Compare URLs case-insentively as we sometimes end up with different
// casing (and difference is usually not significant)
const lurl = trimSlash(url.toLowerCase());
return !specs.find(s => s.nightly?.url?.toLowerCase()?.startsWith(lurl)
|| (s.release && trimSlash(s.release.url.toLowerCase()) === lurl)
|| (s.nightly?.pages && s.nightly.pages.find(u => trimSlash(u.toLowerCase()) === lurl)))
&& !specs.find(s => hasMoreRecentLevel(s, url, url.match(/\/drafts\./) && !url.match(/\/w3\.org/) // Because CSS specs have editors draft with and without levels, we look loosely for more recent levels when checking with editors draft
));
};
const hasUnknownTrSpec = ({spec: url}) => !specs.find(s => s.release && trimSlash(s.release.url) === trimSlash(url)) && !specs.find(s => hasMoreRecentLevel(s,url));
const eitherFilter = (f1, f2) => value => f1(value) || f2(value);
const hasRepoType = type => r => r.w3c && r.w3c["repo-type"]
&& (r.w3c["repo-type"] === type || r.w3c["repo-type"].includes(type));
const hasPublishedContent = (candidate) => fetch(candidate.spec).then(({ok, url}) => {
if (ok) return {...candidate, spec: url};
});
async function findSpecs() {
let candidates = [];
const {groups, repos} = await fetch("https://w3c.github.io/validate-repos/report.json").then(r => r.json());
const specRepos = await fetch("https://w3c.github.io/spec-dashboard/repo-map.json").then(r => r.json());
const whatwgSpecs = await fetch("https://raw.githubusercontent.com/whatwg/sg/master/db.json").then(r => r.json())
.then(d => d.workstreams.map(w => w.standards.map(s => { return {...s, id: s.href.replace(/.*\/([a-z]+)\.spec\.whatwg\.org\//, '$1')}; }) ).flat());
const cssSpecs = await fetch("https://api.github.com/repos/w3c/csswg-drafts/contents/").then(r => r.json()).then(data => data.filter(p => p.type === "dir" && !cssMetaDir.includes(p.path)).map(p => p.path));
const svgSpecs = await fetch("https://api.github.com/repos/w3c/svgwg/contents/specs").then(r => r.json()).then(data => data.filter(p => p.type === "dir" && !svgMetaDir.includes(p.name)).map(p => p.path));
const fxtfSpecs = await fetch("https://api.github.com/repos/w3c/fxtf-drafts/contents/").then(r => r.json()).then(data => data.filter(p => p.type === "dir" && !fxtfMetaDir.includes(p.path)).map(p => p.path));
const houdiniSpecs = await fetch("https://api.github.com/repos/w3c/css-houdini-drafts/contents/").then(r => r.json()).then(data => data.filter(p => p.type === "dir" && !houdiniMetaDir.includes(p.path)).map(p => p.path));
// ECMA proposals are in markdown pages on GitHub. We only watch stage 3
// proposals, which are in the first table on the page.
// Same thing for Web Assembly proposals: let's extract phase 3+ proposals.
// (note GitHub wraps tables in <markdown-accessibility-table> elements and
// headings in a <div class="markdown-heading"> element)
const extractEcmaStage3Proposals = _=>
[...document.querySelector("table").querySelectorAll("tr td:first-child a")].map(a => a.href.split('#')[0]);
const extractWasmProposals = _ =>
[...document.querySelectorAll("table")]
.filter(table => table.parentElement.previousElementSibling.querySelector('h3')?.textContent?.match(/Phase (3|4|5)/))
.map(table => [...table.querySelectorAll("tr td:first-child a")].map(a => a.href.split('#')[0]))
.flat();
let ecmaProposals;
let ecmaIntlProposals;
let wasmProposals;
const browser = await puppeteer.launch();
try {
const page = await browser.newPage();
await page.goto("https://github.com/tc39/proposals/blob/main/README.md");
ecmaProposals = await page.evaluate(extractEcmaStage3Proposals);
await page.goto("https://github.com/tc39/proposals/blob/main/ecma402/README.md");
ecmaIntlProposals = await page.evaluate(extractEcmaStage3Proposals);
await page.goto("https://github.com/WebAssembly/proposals/blob/main/README.md");
wasmProposals = await page.evaluate(extractWasmProposals);
}
finally {
await browser.close();
}
const chromeFeatures = await fetch("https://www.chromestatus.com/features.json").then(r => r.json());
const wgs = Object.values(groups).filter(g => g.type === "working group" && !nonBrowserSpecWgs.includes(g.name));
const cgs = Object.values(groups).filter(g => g.type === "community group" && watchedBrowserCgs.includes(g.name));
// WGs
// * check repos with w3c.json/repo-type including rec-track
const wgRepos = wgs.map(g => g.repos.map(r => r.fullName)).flat()
.map(fullName => repos.find(matchRepoName(fullName)));
const recTrackRepos = wgRepos.filter(eitherFilter(hasRepoType('rec-track'), hasRepoType('registry')));
// * look if those with homepage URLs have a match in the list of specs
candidates = recTrackRepos.filter(r => r.homepageUrl)
.map(canonicalizeGhUrl)
.filter(hasUntrackedURL)
.filter(isInScope);
// * look if those without a homepage URL have a match with their generated URL
candidates = candidates.concat((await Promise.all(recTrackRepos.filter(r => !r.homepageUrl)
.map(toGhUrl)
.filter(hasUntrackedURL)
.filter(isInScope)
.map(hasPublishedContent))).filter(x => x));
// Look which of the specRepos on recTrack from a browser-producing WG have no match
candidates = candidates.concat(
Object.keys(specRepos).map(
r => specRepos[r].filter(s => s.recTrack && wgs.find(g => g.id === s.group)).map(s => { return {repo: r, spec: canonicalizeTRUrl(s.url)};}))
.flat()
.filter(hasUnknownTrSpec)
.filter(isInScope)
);
// CGs
//check repos with w3c.json/repo-type includes cg-report or with no w3c.json
const cgRepos = cgs.map(g => g.repos.map(r => r.fullName)).flat()
.map(fullName => repos.find(matchRepoName(fullName)));
const cgSpecRepos = cgRepos.filter(r => !r.w3c
|| hasRepoType('cg-report')(r));
// * look if those with homepage URLs have a match in the list of specs
candidates = candidates.concat(cgSpecRepos.filter(r => r.homepageUrl)
.map(canonicalizeGhUrl)
.filter(hasUntrackedURL)
.filter(isInScope)
);
// for those without homepageUrl, check which have published content
const publishedCandidates = (await Promise.all(cgSpecRepos.filter(r => !r.homepageUrl)
.map(toGhUrl)
.filter(hasUntrackedURL)
.filter(isInScope)
.map(hasPublishedContent)
)).filter(x => x);
candidates = candidates.concat(publishedCandidates);
// * look if those without homepage URLs but marked as a cg-report
// have a match in the list of specs
const monitorAdditions = cgSpecRepos
.filter(r => !r.homepageUrl && hasRepoType('cg-report')(r) &&
!publishedCandidates.find(p => p.repo === `${r.owner.login}/${r.name}`))
.map(toGhUrl)
.filter(hasUntrackedURL)
.filter(isInScope)
// we remove the spec field since we haven't found a usable url
.map(c => Object.assign({}, {repo: c.repo}));
// Check for new WHATWG streams
candidates = candidates.concat(whatwgSpecs.map(s => { return {repo: `whatwg/${s.id}`, spec: s.href};})
.filter(hasUntrackedURL)
.filter(isInScope));
// Check for new CSS specs
candidates = candidates.concat(cssSpecs.map(s => { return {repo: "w3c/csswg-drafts", spec: `https://drafts.csswg.org/${s}/`};})
.filter(hasUntrackedURL)
.filter(isInScope));
// Check for new SVG specs
candidates = candidates.concat(svgSpecs.map(s => { return {repo: "w3c/svgwg", spec: `https://svgwg.org/${s}/`};})
.filter(hasUntrackedURL)
.filter(isInScope));
// Check for new FXTF specs
candidates = candidates.concat(fxtfSpecs.map(s => { return {repo: "w3c/fxtf-drafts", spec: `https://drafts.fxtf.org/${s}/`};})
.filter(hasUntrackedURL)
.filter(isInScope));
// Check for new Houdini specs
candidates = candidates.concat(houdiniSpecs.map(s => { return {repo: "w3c/css-houdini-drafts", spec: `https://drafts.css-houdini.org/${s}/`};})
.filter(hasUntrackedURL)
.filter(isInScope));
// Check for new TC39 Stage 3 proposals
candidates = candidates.concat(ecmaProposals.concat(ecmaIntlProposals).map(s => { return {repo: s.replace('https://github.com/', ''), spec: s.replace('https://github.com/tc39/', 'https://tc39.es/').replace('https://github.com/tc39-transfer/', 'https://tc39.es/') + '/'};})
.filter(hasUntrackedURL)
.filter(isInScope));
// Check for new WASM phase 3+ proposals
candidates = candidates.concat(wasmProposals.map(s => { return {repo: s.replace('https://github.com/', ''), spec: s.replace(/^https:\/\/github.com\/WebAssembly\/([^/]+)/i, 'https://webassembly.github.io/$1/')}})
.filter(hasUntrackedURL)
.filter(isInScope));
// Add information from Chrome Feature status
candidates = candidates.map(c => { return {...c, impl: { chrome: (chromeFeatures.find(f => f.standards.spec && f.standards.spec.startsWith(c.spec)) || {}).id}};});
// Filter out specs that cannot be fetched (e.g., because the URL we computed
// for the spec simply does not exist yet
for (const candidate of candidates) {
const exists = await fetch(candidate.spec).then(r => r.status === 200);
if (!exists) {
candidate.spec = null;
}
}
candidates = candidates.filter(candidate => !!candidate.spec);
for (const candidate of candidates) {
try {
candidate.shortname = computeShortname(candidate.spec).shortname;
}
catch {}
}
candidates.sort((c1, c2) => {
if (c1.shortname && c2.shortname) {
return c1.shortname.localeCompare(c2.shortname);
}
else if (c1.shortname) {
return -1;
}
else if (c2.shortname) {
return 1;
}
else {
return c1.spec.localeCompare(c2.spec);
}
});
return {
additions: candidates,
monitor: monitorAdditions
};
}
function parseMaxOption(value) {
const parsedValue = parseInt(value, 10);
if (isNaN(parsedValue)) {
throw new Error('The `--max` option value must be a number.');
}
return parsedValue;
}
/*****************************************************************************
* Main loop, create the CLI using Commander.
*****************************************************************************/
const program = new Command();
program
.name('find-specs')
.version(version)
.description('Find candidate specs that could be worth adding to the main list (`specs.json`).')
.option('-g, --github', 'report candidates to the `w3c/browser-specs` GitHub repository. The command will create one issue per candidate spec.')
.option('-m, --max <number>', 'set the maximum number of issues to create. The option is only meaningful when the `--github` option is set. Default value is 5. Set the option to 0 to report all candidate specs.', parseMaxOption, 5)
.option('-r, --repos', 'report candidate repositories with no published content as well.')
.addHelpText('after', `
Output:
- The command reports a list of candidates for addition.
- Additionally, if the \`--github\` option is set, the command also reports these candidates as issues opened against the \`w3c/browser-specs\` repository.
Notes:
- The command only creates an issue if there is no open issue that already suggests adding the spec.
Examples:
$ find-specs
$ find-specs --github --max 3
`)
.action(async (options) => {
const candidates = await findSpecs();
if (candidates.additions.length + candidates.monitor.length === 0) {
console.log('No candidate specs found');
return;
}
if (candidates.additions.length > 0) {
console.log("New candidate specs that may be worth adding:");
for (const c of candidates.additions) {
const specName = c.shortname ? `[${c.shortname}](${c.spec})` : c.spec;
const repoName = `[${c.repo}](https://github.com/${c.repo})`;
const chromeLink = c.impl?.chrome ?
` [chrome status](https://www.chromestatus.com/features/${c.impl.chrome})` :
"";
console.log(`- ${specName} from ${repoName}${chromeLink}`);
}
}
if (options.repos && candidates.monitor.length > 0) {
if (candidates.additions.length > 0) {
console.log();
}
console.log("Non-monitored repositories without published content:");
for (const {repo} of candidates.monitor) {
console.log(`- [${repo}](https://github.com/${repo})`);
}
}
if (options.github) {
console.log();
let issuesStr;
try {
issuesStr = execSync(`gh issue list --label "new spec" --json body,number`);
}
catch (err) {
console.log(`Could not retrieve open issues from w3c/browser-specs repository.`);
process.exit(1);
}
const issues = JSON.parse(issuesStr);
let created = 0;
for (const candidate of candidates.additions) {
const issue = issues.find(issue => issue.body.includes(candidate.spec));
if (issue) {
// Skip as there's already an issue opened for that candidate spec
continue;
}
// Important: the issue body must match the `suggest-spec.yml` issue
// template. There is unfortunately no easy way to create an issue out
// of such a template directly.
const title = `Add ${candidate.shortname ?? candidate.spec}`;
const bodyFile = path.join(scriptPath, "..", "__issue.md");
const comments = [
`- See repository: [${candidate.repo}](https://github.com/${candidate.repo})`,
candidate.impl.chrome ? `- [chrome status](${candidate.impl.chrome})` : null,
candidate.shortname ? `- Would-be shortname: \`${candidate.shortname}\`` : null
].filter(comment => !!comment);
await fs.writeFile(
bodyFile,
`### URL
${candidate.spec}
### Rationale
${comments.join("\n")}
### Additional properties
\`\`\`json
{}
\`\`\`
`
, 'utf8');
execSync(`gh issue create --label "new spec" --title "${title}" --body-file "__issue.md"`, execParams);
await fs.rm(bodyFile, { force: true });
created++;
if (options.max > 0 && created > options.max) {
break;
}
}
}
});
program.parseAsync(process.argv);