Skip to content

Commit

Permalink
removed condasrc from packages and improved tests
Browse files Browse the repository at this point in the history
fixed checksum

removed redundant check

fixed checksum

fixed tests

added more tests

fix test

improved tests

update

updated tests

fixed test

refactoring

update
  • Loading branch information
Basit Ayantunde committed Jan 22, 2024
1 parent eb8de7b commit 0408e96
Show file tree
Hide file tree
Showing 10 changed files with 327 additions and 178 deletions.
12 changes: 0 additions & 12 deletions config/map.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,6 @@ const conda = {
fossology
}

const condasrc = {
_type: 'condasrc',
source,
clearlydefined,
licensee,
reuse,
scancode,
fossology
}

const crate = {
_type: 'crate',
source,
Expand Down Expand Up @@ -148,7 +138,6 @@ const _package = {
_type: 'package',
npm,
conda,
condasrc,
crate,
deb,
go,
Expand Down Expand Up @@ -179,7 +168,6 @@ const entities = {
reuse,
npm,
conda,
condasrc,
crate,
deb,
go,
Expand Down
128 changes: 36 additions & 92 deletions providers/fetch/condaFetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ class CondaFetch extends AbstractFetch {
'anaconda-r': 'https://repo.anaconda.com/pkgs/r',
'conda-forge': 'https://conda.anaconda.org/conda-forge'
}
this.headers = {
'User-Agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)'
}
this.CACHE_DURATION = 8 * 60 * 60 * 1000 // 8 hours
}

canHandle(request) {
const spec = this.toSpec(request)
return spec && this.channels[spec.provider]
return spec && !!(this.channels[spec.provider])
}

// {type: conda|condasrc}/{provider: anaconda-main|anaconda-r|conda-forge}/{architecture|-}/{package name}/[{version | _}]-[{build version | _}]/
Expand All @@ -33,9 +37,6 @@ class CondaFetch extends AbstractFetch {
// conda/conda-forge/-/numpy/_-_
async handle(request) {
const spec = this.toSpec(request)
if (!this.channels[spec.provider]) {
return request.markSkip(`Unrecognized conda provider: ${spec.provider}, must be either of: ${Object.keys(this.channels)}`)
}
if (spec.type !== 'conda' && spec.type !== 'condasrc') {
return request.markSkip('spec type must either be conda or condasrc')
}
Expand Down Expand Up @@ -92,10 +93,10 @@ class CondaFetch extends AbstractFetch {
return request
}

_matchPackage(spec, version, buildVersion, repoData) {
_matchPackage(name, version, buildVersion, repoData) {
let packageRepoEntries = []
let packageMatches = ([, packageData]) => {
return packageData.name === spec.name && ((!version) || version === '_' || version === packageData.version)
return packageData.name === name && ((!version) || version === '_' || version === packageData.version)
&& ((!buildVersion) || buildVersion === '_' || packageData.build.startsWith(buildVersion))
}
if (repoData['packages']) {
Expand All @@ -108,24 +109,12 @@ class CondaFetch extends AbstractFetch {
.filter(packageMatches)
.map(([packageFile, packageData]) => { return { packageFile, packageData } }))
}
packageRepoEntries.sort((a, b) => {
if (a.packageData.build < b.packageData.build) {
return 1
} else if (a.packageData.build === b.packageData.build) {
return 0
}
else {
return -1
}
})
packageRepoEntries.sort((a, b) => b.packageData.build.localeCompare(a.packageData.build))
return packageRepoEntries
}

async _downloadCondaPackage(spec, request, version, buildVersion, architecture, packageChannelData) {
if (packageChannelData.subdirs.length === 0) {
return request.markSkip('No architecture build in package channel data')
}
if (!architecture || architecture === '-') {
if (!architecture || architecture === '-' && packageChannelData.subdirs.length > 0) {
// prefer no-arch if available
architecture = packageChannelData.subdirs.includes('noarch') ? 'noarch' : packageChannelData.subdirs[0]
this.logger.info(`No binary architecture specified for ${spec.name}, using architecture: ${architecture}`)
Expand All @@ -138,7 +127,7 @@ class CondaFetch extends AbstractFetch {
if (!repoData) {
return request.markSkip(`failed to fetch and parse repodata json file for channel ${spec.provider} in architecture ${architecture}`)
}
let packageRepoEntries = this._matchPackage(spec, version, buildVersion, repoData)
let packageRepoEntries = this._matchPackage(spec.name, version, buildVersion, repoData)
if (packageRepoEntries.length == 0) {
return request.markSkip(`Missing package with matching spec (version: ${version}, buildVersion: ${buildVersion}) in ${architecture} repository`)
}
Expand Down Expand Up @@ -167,91 +156,46 @@ class CondaFetch extends AbstractFetch {
}

async _downloadPackage(downloadUrl, destination) {
return new Promise(
(resolve, reject) => {
const options = {
url: downloadUrl,
headers: {
'User-Agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)'
}
}
nodeRequest.get(options, (error, response) => {
if (error) {
return reject(error)
}
if (response.statusCode !== 200) {
return reject(new Error(`${response.statusCode} ${response.statusMessage}`))
}
}).pipe(fs.createWriteStream(destination).on('finish', () =>
resolve()
))
}
)
return new Promise((resolve, reject) => {
const options = { url: downloadUrl, headers: this.headers }
nodeRequest.get(options, (error, response) => {
if (error) return reject(error)
if (response.statusCode !== 200) return reject(new Error(`${response.statusCode} ${response.statusMessage}`))
}).pipe(fs.createWriteStream(destination).on('finish', () => resolve()))
})
}

async _cachedDownload(cacheKey, sourceUrl, cacheDuration, fileDstLocation) {
if (!memCache.get(cacheKey)) {
return new Promise(
(resolve, reject) => {
const options = {
url: sourceUrl,
headers: {
'User-Agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)'
}
}
nodeRequest.get(options, (error, response) => {
if (error) {
return reject(error)
}
if (response.statusCode !== 200) {
return reject(new Error(`${response.statusCode} ${response.statusMessage}`))
}
}).pipe(fs.createWriteStream(fileDstLocation).on('finish', () => {
memCache.put(cacheKey, true, cacheDuration)
this.logger.info(
`Conda: retrieved ${sourceUrl}. Stored channel data file at ${fileDstLocation}`
)
return resolve()
}))
}
)
return new Promise((resolve, reject) => {
const options = { url: sourceUrl, headers: this.headers }
nodeRequest.get(options, (error, response) => {
if (error) return reject(error)
if (response.statusCode !== 200) return reject(new Error(`${response.statusCode} ${response.statusMessage}`))
}).pipe(fs.createWriteStream(fileDstLocation).on('finish', () => {
memCache.put(cacheKey, true, cacheDuration)
this.logger.info(`Conda: retrieved ${sourceUrl}. Stored data file at ${fileDstLocation}`)
return resolve()
}))
})
}
}

async getChannelData(condaChannelUrl, condaChannelID) {
// ~10MB file, needs to be cached
let channelDataFile = {
url: `${condaChannelUrl}/channeldata.json`,
cacheKey: `${condaChannelID}-channelDataFile`,
cacheDuration: 8 * 60 * 60 * 1000,// 8 hours
fileLocation: `${this.packageMapFolder}/${condaChannelID}-channelDataFile.json`
}
async _fetchCachedJSONFile(cacheKey, url, cacheDuration, fileLocation) {
try {
await this._cachedDownload(channelDataFile.cacheKey, channelDataFile.url,
channelDataFile.cacheDuration, channelDataFile.fileLocation)
await this._cachedDownload(cacheKey, url, cacheDuration, fileLocation)
} catch (error) {
return null
}
let fileText = fs.readFileSync(channelDataFile.fileLocation)
return JSON.parse(fileText)
return JSON.parse(fs.readFileSync(fileLocation))
}

async getChannelData(condaChannelUrl, condaChannelID) {
return await this._fetchCachedJSONFile(`${condaChannelID}-channelDataFile`, `${condaChannelUrl}/channeldata.json`, this.CACHE_DURATION, `${this.packageMapFolder}/${condaChannelID}-channelDataFile.json`)
}

async getRepoData(condaChannelUrl, condaChannelID, architecture) {
// ~30MB file, needs to be cached
let repoFile = {
url: `${condaChannelUrl}/${architecture}/repodata.json`,
cacheKey: `${condaChannelID}-repoDataFile-${architecture}`,
cacheDuration: 8 * 60 * 60 * 1000,// 8 hours
fileLocation: `${this.packageMapFolder}/${condaChannelID}-repoDataFile-${architecture}.json`
}
try {
await this._cachedDownload(repoFile.cacheKey, repoFile.url,
repoFile.cacheDuration, repoFile.fileLocation)
} catch (error) {
return null
}
let fileText = fs.readFileSync(repoFile.fileLocation)
return JSON.parse(fileText)
return await this._fetchCachedJSONFile(`${condaChannelID}-repoDataFile-${architecture}`, `${condaChannelUrl}/${architecture}/repodata.json`, this.CACHE_DURATION, `${this.packageMapFolder}/${condaChannelID}-repoDataFile-${architecture}.json`)
}
}

Expand Down
34 changes: 4 additions & 30 deletions providers/process/condaSrcExtract.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
const AbstractClearlyDefinedProcessor = require('./abstractClearlyDefinedProcessor')
const sourceDiscovery = require('../../lib/sourceDiscovery')
const { merge } = require('lodash')
const SourceSpec = require('../../lib/sourceSpec')

class CondaSrcExtract extends AbstractClearlyDefinedProcessor {
constructor(options, sourceFinder) {
Expand All @@ -15,38 +14,13 @@ class CondaSrcExtract extends AbstractClearlyDefinedProcessor {

canHandle(request) {
const spec = this.toSpec(request)
return request.type === 'conda' && spec && spec.type === 'condasrc'
return request.type === 'clearlydefined' && spec && spec.type === 'condasrc'
}

async handle(request) {
if (this.isProcessing(request)) {
await super.handle(request)
const { releaseDate, registryData, declaredLicenses } = request.document
request.document = merge(this.clone(request.document), { releaseDate, registryData, declaredLicenses })
let sourceCandidates = [
registryData.channelData.source_url,
registryData.channelData.source_git_url,
registryData.channelData.home,
registryData.channelData.dev_url,
registryData.channelData.doc_url,
registryData.channelData.doc_source_url].filter(e => e)
let sourceInfo = undefined
const githubSource = await this.sourceFinder(
registryData.channelData.version, sourceCandidates, {
githubToken: this.options.githubToken,
logger: this.logger
})
if (githubSource) {
sourceInfo = githubSource
request.document.sourceInfo = sourceInfo
}
}
this.addLocalToolTasks(request)
if (request.document.sourceInfo) {
const sourceSpec = SourceSpec.fromObject(request.document.sourceInfo)
this.linkAndQueue(request, 'source', sourceSpec.toEntitySpec())
}
return request
await super.handle(request)
const { releaseDate, registryData, declaredLicenses } = request.document
request.document = merge(this.clone(request.document), { releaseDate, registryData, declaredLicenses })
}
}

Expand Down
2 changes: 1 addition & 1 deletion providers/process/package.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

const AbstractProcessor = require('./abstractProcessor')

const supportedTypes = ['npm', 'conda', 'condasrc', 'crate', 'maven', 'nuget', 'gem', 'go', 'pod', 'pypi', 'composer', 'deb']
const supportedTypes = ['npm', 'conda', 'crate', 'maven', 'nuget', 'gem', 'go', 'pod', 'pypi', 'composer', 'deb']

class PackageProcessor extends AbstractProcessor {
shouldFetch() {
Expand Down
2 changes: 1 addition & 1 deletion providers/process/source.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: MIT

const AbstractProcessor = require('./abstractProcessor')
const supportedTypes = ['git', 'sourcearchive', 'debsrc']
const supportedTypes = ['git', 'sourcearchive', 'debsrc', 'condasrc']

class SourceProcessor extends AbstractProcessor {
shouldFetch() {
Expand Down
2 changes: 1 addition & 1 deletion providers/process/top.js
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ class TopProcessor extends AbstractProcessor {
`Conda top - coordinates: ${packagesCoordinates.length}, start: ${start}, end: ${end}, sliced: ${slicedCoordinates.length}`
)

await request.queueRequests(slicedCoordinates.map(coord => new Request('package', coord)))
await request.queueRequests(slicedCoordinates.map(coord => new Request(spec.type == 'conda' ? 'package' : 'source', coord)))
return request.markNoSave()
}

Expand Down
Binary file added test/fixtures/conda/21cmFAST-3.3.1.tar.gz
Binary file not shown.
38 changes: 38 additions & 0 deletions test/fixtures/conda/channeldata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"channeldata_version": 1,
"packages": {
"21cmfast": {
"activate.d": false,
"binary_prefix": true,
"deactivate.d": false,
"description": "21cmFAST provides a simple and fast simulation package for the cosmological 21cm signal as either coeval cubes or full lightcones.",
"dev_url": "https://github.com/21cmFAST/21cmFAST",
"doc_url": "https://21cmFAST.readthedocs.io/",
"home": "https://github.com/21cmFAST/21cmFAST",
"license": "MIT",
"post_link": false,
"pre_link": false,
"pre_unlink": false,
"run_exports": {},
"source_url": "https://pypi.io/packages/source/2/21cmFAST/21cmFAST-3.3.1.tar.gz",
"subdirs": ["linux-64", "osx-64"],
"summary": "A semi-numerical cosmological simulation code for the 21cm signal",
"text_prefix": true,
"timestamp": 1651260314,
"version": "3.3.1"
},
"21cmfast_invalid": {
"activate.d": false,
"binary_prefix": true,
"deactivate.d": false,
"post_link": false,
"pre_link": false,
"pre_unlink": false,
"run_exports": {},
"subdirs": ["linux-64", "osx-64"],
"text_prefix": true,
"timestamp": 1651260314,
"version": "3.3.1"
}
}
}
Loading

0 comments on commit 0408e96

Please sign in to comment.