diff --git a/README.md b/README.md index 0f2439663..3807d5ea4 100644 --- a/README.md +++ b/README.md @@ -243,6 +243,7 @@ The format of harvested data is tool-specific. Tool output is stored in the tool - composer - rubygem - deb +- conda ## Provider Registry @@ -258,6 +259,9 @@ The format of harvested data is tool-specific. Tool output is stored in the tool - packagist.org - proxy.golang.org - ftp.debian.org +- repo.anaconda.com/pkgs/main (anaconda-main) +- repo.anaconda.com/pkgs/r (anaconda-r) +- conda.anaconda.org/conda-forge (conda-forge) ## Tool Name Registry diff --git a/app.js b/app.js index c3e3e6574..e0419b053 100644 --- a/app.js +++ b/app.js @@ -184,6 +184,7 @@ function createApp(config) { app.use('/', require('./routes/index')(config.buildsha)) app.use('/origins/github', require('./routes/originGitHub')()) app.use('/origins/crate', require('./routes/originCrate')()) + app.use('/origins/conda', require('./routes/originConda')()) app.use('/origins/pod', require('./routes/originPod')()) app.use('/origins/npm', require('./routes/originNpm')()) app.use('/origins/maven', require('./routes/originMaven')()) diff --git a/business/statsService.js b/business/statsService.js index 8167713a1..0276d1dc8 100644 --- a/business/statsService.js +++ b/business/statsService.js @@ -35,6 +35,8 @@ class StatsService { _getStatLookup() { return { total: () => this._getType('total'), + conda: () => this._getType('conda'), + condasrc: () => this._getType('condasrc'), crate: () => this._getType('crate'), gem: () => this._getType('gem'), git: () => this._getType('git'), diff --git a/docs/determining-declared-license.md b/docs/determining-declared-license.md index 731d5de65..ded8128c5 100644 --- a/docs/determining-declared-license.md +++ b/docs/determining-declared-license.md @@ -109,3 +109,8 @@ * source: https://cocoapods.org/ * The service then sets the declared license based on the registry information * The ClearlyDefined summarizer pulls registry information from 'https://raw.githubusercontent.com/CocoaPods/Specs/master + +### conda +* source: conda-forge anaconda-main, or anaconda-r (https://conda.anaconda.org) +* The crawler gets registry information from https://conda.anaconda.org/conda-forge +* The ClearlyDefined summarizer sets the declared license to the license(s) in the registry information diff --git a/lib/licenseMatcher.js b/lib/licenseMatcher.js index a4316068c..582576852 100644 --- a/lib/licenseMatcher.js +++ b/lib/licenseMatcher.js @@ -111,6 +111,10 @@ class HarvestLicenseMatchPolicy { switch (type) { case 'maven': return new BaseHarvestLicenseMatchStrategy('maven', ['manifest.summary.licenses']) + case 'conda': + return new BaseHarvestLicenseMatchStrategy('conda', ['declaredLicenses']) + case 'condasrc': + return new BaseHarvestLicenseMatchStrategy('condasrc', ['declaredLicenses']) case 'crate': return new BaseHarvestLicenseMatchStrategy('crate', ['registryData.license']) case 'pod': diff --git a/providers/summary/clearlydefined.js b/providers/summary/clearlydefined.js index 13278b551..7f2f47606 100644 --- a/providers/summary/clearlydefined.js +++ b/providers/summary/clearlydefined.js @@ -20,6 +20,12 @@ const mavenBasedUrls = { gradleplugin: 'https://plugins.gradle.org/m2' } +const condaChannels = { + 'anaconda-main': 'https://repo.anaconda.com/pkgs/main', + 'anaconda-r': 'https://repo.anaconda.com/pkgs/r', + 'conda-forge': 'https://conda.anaconda.org/conda-forge' +} + class ClearlyDescribedSummarizer { constructor(options) { this.options = options @@ -44,6 +50,12 @@ class ClearlyDescribedSummarizer { case 'npm': this.addNpmData(result, data, coordinates) break + case 'conda': + this.addCondaData(result, data, coordinates) + break + case 'condasrc': + this.addCondaSrcData(result, data, coordinates) + break case 'crate': this.addCrateData(result, data, coordinates) break @@ -191,6 +203,23 @@ class ClearlyDescribedSummarizer { if (licenses.length) setIfValue(result, 'licensed.declared', SPDX.normalize(licenses.join(' OR '))) } + addCondaData(result, data, coordinates) { + setIfValue(result, 'described.releaseDate', extractDate(get(data, 'releaseDate'))) + setIfValue(result, 'described.urls.download', get(data, 'registryData.downloadUrl')) + setIfValue(result, 'described.urls.registry', new URL(`${condaChannels[coordinates.provider]}`).href) + setIfValue(result, 'described.projectWebsite', get(data, 'registryData.channelData.home')) + setIfValue(result, 'licensed.declared', SPDX.normalize(data.declaredLicenses)) + } + + addCondaSrcData(result, data, coordinates) { + setIfValue(result, 'described.releaseDate', extractDate(data.releaseDate)) + setIfValue(result, 'described.urls.download', get(data, 'registryData.channelData.source_url')) + setIfValue(result, 'described.urls.registry', new URL(`${condaChannels[coordinates.provider]}`).href) + setIfValue(result, 'described.projectWebsite', get(data, 'registryData.channelData.home')) + setIfValue(result, 'licensed.declared', SPDX.normalize(data.declaredLicenses)) + } + + addCrateData(result, data, coordinates) { setIfValue(result, 'described.releaseDate', extractDate(get(data, 'registryData.created_at'))) setIfValue(result, 'described.projectWebsite', get(data, 'manifest.homepage')) diff --git a/routes/originConda.js b/routes/originConda.js new file mode 100644 index 000000000..d978a6c4c --- /dev/null +++ b/routes/originConda.js @@ -0,0 +1,101 @@ +// Copyright (c) Microsoft Corporation and others. Licensed under the MIT license. +// SPDX-License-Identifier: MIT + +const asyncMiddleware = require('../middleware/asyncMiddleware') +const router = require('express').Router() +const requestPromise = require('request-promise-native') +const { uniq } = require('lodash') +const { Cache } = require('memory-cache') +const condaChannels = { + 'anaconda-main': 'https://repo.anaconda.com/pkgs/main', + 'anaconda-r': 'https://repo.anaconda.com/pkgs/r', + 'conda-forge': 'https://conda.anaconda.org/conda-forge' +} +const condaCache = new Cache() + +async function fetchCondaChannelData(channel) { + const key = `${channel}-channelData` + let channelData = condaCache.get(key) + if (!channelData) { + const url = `${condaChannels[channel]}/channeldata.json` + channelData = await requestPromise({ url, method: 'GET', json: true }) + condaCache.put(key, channelData, 8 * 60 * 60 * 1000) // 8 hours + } + return channelData +} + +async function fetchCondaRepoData(channel, subdir) { + const key = `${channel}-${subdir}-repoData` + let repoData = condaCache.get(key) + if (!repoData) { + const url = `${condaChannels[channel]}/${subdir}/repodata.json` + repoData = await requestPromise({ url, method: 'GET', json: true }) + condaCache.put(key, repoData, 8 * 60 * 60 * 1000) // 8 hours + } + return repoData +} + +router.get( + '/:channel/:subdir/:name/revisions', + asyncMiddleware(async (request, response) => { + let { channel, subdir, name } = request.params + channel = encodeURIComponent(channel) + subdir = encodeURIComponent(subdir) + name = encodeURIComponent(channel) + if (!condaChannels[channel]) { + return response.status(404).send(`Unrecognized Conda channel ${channel}`) + } + let channelData = await fetchCondaChannelData(channel) + if (!channelData.packages[name]) { + return response.status(404).send(`Package ${name} not found in Conda channel ${channel}`) + } + if (subdir !== '-' && !channelData.subdirs.find(x => x == subdir)) { + return response.status(404).send(`Subdir ${subdir} is non-existent in Conda channel ${channel}`) + } + let revisions = [] + let subdirs = subdir === '-' ? channelData.packages[name].subdirs : [subdir] + for (let subdir of subdirs) { + const repoData = await fetchCondaRepoData(channel, subdir) + if (repoData['packages']) { + Object.entries(repoData['packages']) + .forEach(([, packageData]) => { + if (packageData.name === name) { + revisions.push(`${subdir}:${packageData.version}-${packageData.build}`) + } + }) + } + if (repoData['packages.conda']) { + Object.entries(repoData['packages.conda']) + .forEach(([, packageData]) => { + if (packageData.name === name) { + revisions.push(`${subdir}:${packageData.version}-${packageData.build}`) + } + }) + } + } + return response.status(200).send(uniq(revisions)) + }) +) + +router.get( + '/:channel/:name', + asyncMiddleware(async (request, response) => { + let { channel, name } = request.params + channel = encodeURIComponent(channel) + name = encodeURIComponent(name) + if (!condaChannels[channel]) { + return response.status(404).send([]) + } + let channelData = await fetchCondaChannelData(channel) + let matches = Object.entries(channelData.packages).filter(([packageName,]) => packageName.includes(name)).map( + ([packageName,]) => { return { id: packageName } } + ) + return response.status(200).send(matches) + }) +) + +function setup() { + return router +} + +module.exports = setup \ No newline at end of file diff --git a/schemas/coordinates-1.0.json b/schemas/coordinates-1.0.json index 023ca4012..ffb996b85 100644 --- a/schemas/coordinates-1.0.json +++ b/schemas/coordinates-1.0.json @@ -13,6 +13,8 @@ "type": { "enum": [ "npm", + "conda", + "condasrc", "crate", "git", "maven", @@ -32,8 +34,11 @@ }, "provider": { "enum": [ + "anaconda-main", + "anaconda-r", "npmjs", "cocoapods", + "conda-forge", "cratesio", "github", "gitlab", diff --git a/schemas/curation-1.0.json b/schemas/curation-1.0.json index 2bd32a9b1..eeda81552 100644 --- a/schemas/curation-1.0.json +++ b/schemas/curation-1.0.json @@ -24,6 +24,8 @@ "type": "string", "enum": [ "npm", + "conda", + "condasrc", "crate", "git", "maven", @@ -45,8 +47,11 @@ "provider": { "type": "string", "enum": [ + "anaconda-main", + "anaconda-r", "npmjs", "cocoapods", + "conda-forge", "cratesio", "github", "gitlab", diff --git a/schemas/curations-1.0.json b/schemas/curations-1.0.json index d520d53f4..fec1f048c 100644 --- a/schemas/curations-1.0.json +++ b/schemas/curations-1.0.json @@ -26,6 +26,8 @@ "type": "string", "enum": [ "npm", + "conda", + "condasrc", "crate", "git", "go", @@ -47,8 +49,11 @@ "provider": { "type": "string", "enum": [ + "anaconda-main", + "anaconda-r", "npmjs", "cocoapods", + "conda-forge", "cratesio", "github", "gitlab", diff --git a/schemas/definition-1.0.json b/schemas/definition-1.0.json index f492f2cd6..4383b13df 100644 --- a/schemas/definition-1.0.json +++ b/schemas/definition-1.0.json @@ -32,6 +32,8 @@ "type": { "enum": [ "npm", + "conda", + "condasrc", "crate", "git", "maven", @@ -48,8 +50,11 @@ }, "provider": { "enum": [ + "anaconda-main", + "anaconda-r", "npmjs", "cocoapods", + "conda-forge", "cratesio", "github", "gitlab", @@ -471,4 +476,4 @@ } } } -} \ No newline at end of file +} diff --git a/schemas/swagger.yaml b/schemas/swagger.yaml index 441ac4566..a0d031c28 100644 --- a/schemas/swagger.yaml +++ b/schemas/swagger.yaml @@ -423,6 +423,7 @@ components: example: - git/github/microsoft/redie/194269b5b7010ad6f8dc4ef608c88128615031ca - npm/npmjs/-/redie/0.3.0 + - conda/conda-forge/linux-64/21cmfast/3.1.1-py36 noticeFile: type: object @@ -531,6 +532,8 @@ components: type: string enum: - composer + - conda + - condasrc - crate - deb - debsrc @@ -551,7 +554,10 @@ components: schema: type: string enum: + - anaconda-main + - anaconda-r - cocoapods + - conda-forge - cratesio - debian - github @@ -568,7 +574,7 @@ components: name: namespace in: path required: true - description: many component systems have namespaces. GitHub orgs, NPM namespace, Maven group id, ... This segment must be supplied. If your component does not have a namespace, use '-' (ASCII hyphen). + description: many component systems have namespaces. GitHub orgs, NPM namespace, Maven group id, Conda Subdir/Architecture ... This segment must be supplied. If your component does not have a namespace, use '-' (ASCII hyphen). schema: type: string name: diff --git a/test/summary/clearlydefinedTests.js b/test/summary/clearlydefinedTests.js index 1bf1a50fb..31c99f81e 100644 --- a/test/summary/clearlydefinedTests.js +++ b/test/summary/clearlydefinedTests.js @@ -87,6 +87,49 @@ describe('ClearlyDescribedSummarizer add files', () => { }) }) +describe('ClearlyDescribedSummarizer addCondaData', () => { + const condaTestCoordinates = EntityCoordinates.fromString('conda/conda-forge/-/test/1.0') + it('declares license from registryData', () => { + let result = {} + summarizer.addCondaData(result, { declaredLicenses: 'MIT' }, condaTestCoordinates) + assert.strictEqual(get(result, 'licensed.declared'), 'MIT') + }) + + it('declares dual license from registryData with SPDX expression', () => { + let result = {} + let data = setup([{ path: 'LICENSE-MIT', license: 'MIT' }, { path: 'LICENSE-APACHE', license: 'Apache-2.0' }]) + data.declaredLicenses = 'MIT OR Apache-2.0' + summarizer.addCondaData(result, data, condaTestCoordinates) + assert.strictEqual(get(result, 'licensed.declared'), 'MIT OR Apache-2.0') + }) + + it('normalizes to spdx only', () => { + let result = {} + summarizer.addCondaData(result, { declaredLicenses: 'Garbage' }, condaTestCoordinates) + assert.strictEqual(get(result, 'licensed.declared'), 'NOASSERTION') + }) + + it('describes projectWebsite from registryData', () => { + let result = {} + summarizer.addCondaData(result, { + registryData: { + channelData: { home: 'https://github.com/owner/repo' } + } + }, condaTestCoordinates) + assert.strictEqual(result.described.projectWebsite, 'https://github.com/owner/repo') + }) + + it('describes releaseDate from registryData', () => { + let result = {} + summarizer.addCondaData( + result, + { releaseDate: 'Wed, 14 Jun 2017 07:00:00 GMT' }, + condaTestCoordinates + ) + assert.strictEqual(result.described.releaseDate, '2017-06-14') + }) +}) + describe('ClearlyDescribedSummarizer addCrateData', () => { const crateTestCoordinates = EntityCoordinates.fromString('crate/cratesio/-/test/1.0') it('declares license from registryData', () => {