Skip to content

Commit

Permalink
feat: basic functionality
Browse files Browse the repository at this point in the history
run queries for all swMATH links
  • Loading branch information
physikerwelt committed Nov 28, 2019
1 parent 7f55630 commit ed8d47f
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 21 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,5 @@ typings/
.sonarlint/

.idea/

swh.cache/
12 changes: 8 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "template-typescript-package",
"name": "swh-swmath-save-now-batch",
"version": "0.0.0-development",
"description": "A boilerplate repo for publishing typescript packages to npm",
"description": "A simple script to batch query the software heritage save now api with swMATH data",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"scripts": {
Expand Down Expand Up @@ -41,8 +41,10 @@
"devDependencies": {
"@commitlint/cli": "^8.2.0",
"@commitlint/config-conventional": "^8.2.0",
"@types/cacache": "^12.0.1",
"@types/jest": "^24.0.23",
"@types/node": "^12.12.12",
"@types/node-fetch": "^2.5.4",
"@typescript-eslint/eslint-plugin": "^2.9.0",
"@typescript-eslint/parser": "^2.9.0",
"eslint": "^6.7.1",
Expand All @@ -61,8 +63,10 @@
"dist/**/*"
],
"dependencies": {
"@types/node-fetch": "^2.5.4",
"cacache": "^13.0.1",
"fast-csv": "^3.4.0",
"node-fetch": "^2.6.0"
"loglevel": "^1.6.6",
"node-fetch": "^2.6.0",
"p-queue": "^6.2.1"
}
}
7 changes: 0 additions & 7 deletions src/__tests__/greet.test.ts

This file was deleted.

11 changes: 11 additions & 0 deletions src/__tests__/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { swh, deleteCacheEntry, processLink } from '..'
import * as log from 'loglevel'

log.setDefaultLevel('info')
jest.setTimeout(300000) // Allow 5 min to process all links

test('Process pseudo-link', async () => processLink(0, 'https://github.com/fairmath/SwhSaveNowBatch.git'))
test('Process pseudo-link again', async () => processLink(0, 'https://github.com/fairmath/SwhSaveNowBatch.git'))
test('Delete pseudo-link', async () => deleteCacheEntry(0))

test('Get swh list again', async () => swh())
44 changes: 34 additions & 10 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,49 @@
import { Response } from 'node-fetch'
import csv = require('fast-csv')
import CsvParserStream from 'fast-csv/build/src/parser/CsvParserStream'

import cacach = require('cacache')
import fetch from 'node-fetch'
import log = require('loglevel')
import PQueue from 'p-queue'

const cachePath = './swh.cache'

export function processLink(id: number, url: string): Promise<boolean | string> {
log.trace('processing', { id: id, url: url })
const cacheKey = `${id}`
return cacach
.get(cachePath, cacheKey)
.then((x: any) => {
return x.data
})
.catch(() =>
fetch(`https://archive.softwareheritage.org/api/1/git/url/${url}/`).then((res: Response) =>
res.text().then((text) => cacach.put(cachePath, cacheKey, text)),
),
)
}

function parseCSV(res: Response): Promise<CsvParserStream> {
function parseCSV(res: Response): Promise<PQueue> {
return new Promise((resolve) => {
const stream = res.body.pipe(csv.parse())
const stream = res.body.pipe(csv.parse({ delimiter: ';', ignoreEmpty: true }))
const queue = new PQueue({ concurrency: 10 })
let count = 0
queue.on('active', () => {
log.debug(`Working on item #${++count}. Size: ${queue.size} Pending: ${queue.pending}`)
})
stream
.on('error', /* istanbul ignore next */ (error: Error) => console.error(error))
.on('data', (row: string) => console.log(`ROW=${JSON.stringify(row)}`))
.on('data', (row: [number, string]) => queue.add(() => processLink(row[0], row[1])))
.on('end', (rowCount: number) => {
console.log(`Parsed ${rowCount} rows`)
resolve(stream)
stream.end()
log.info(`Received ${rowCount} rows from swMATH`)
resolve(queue)
})
})
}

export const greet = (name: string) => `Hello ${name}`

export const swh = () =>
fetch('http://swmath.org/SWH/')
.then(parseCSV)
.then((stream: CsvParserStream) => stream.end())
.then((q) => q.onIdle().then(() => log.info('Processing finished')))

export const deleteCacheEntry = (id: number) => cacach.rm.entry(cachePath, `${id}`)

0 comments on commit ed8d47f

Please sign in to comment.