Skip to content

Commit

Permalink
Merge pull request #1232 from qtomlinson/qt/optimize_recompute
Browse files Browse the repository at this point in the history
Optimize recomputing definition
  • Loading branch information
qtomlinson authored Nov 21, 2024
2 parents f8e77ad + c541259 commit f79a63c
Show file tree
Hide file tree
Showing 23 changed files with 629 additions and 142 deletions.
2 changes: 1 addition & 1 deletion business/definitionService.js
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ class DefinitionService {
*/
async compute(coordinates, curationSpec) {
this.logger.debug('4:compute:blob:start', { ts: new Date().toISOString(), coordinates: coordinates.toString() })
const raw = await this.harvestStore.getAll(coordinates)
const raw = await this.harvestStore.getAllLatest(coordinates)
this.logger.debug('4:compute:blob:end', { ts: new Date().toISOString(), coordinates: coordinates.toString() })
coordinates = this._getCasedCoordinates(raw, coordinates)
this.logger.debug('5:compute:summarize:start', {
Expand Down
1 change: 1 addition & 0 deletions lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ function getLatestVersion(versions) {
const normalizedCurrent = _normalizeVersion(current)
if (!normalizedCurrent || semver.prerelease(normalizedCurrent) !== null) return max
const normalizedMax = _normalizeVersion(max)
if (!normalizedMax) return normalizedCurrent
return semver.gt(normalizedCurrent, normalizedMax) ? current : max
}, versions[0])
}
Expand Down
2 changes: 1 addition & 1 deletion providers/stores/abstractAzblobStore.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class AbstractAzBlobStore {
constructor(options) {
this.options = options
this.containerName = options.containerName
this.logger = logger()
this.logger = this.options.logger || logger()
}

async initialize() {
Expand Down
24 changes: 23 additions & 1 deletion providers/stores/abstractFileStore.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ const recursive = require('recursive-readdir')
const { promisify } = require('util')
const ResultCoordinates = require('../../lib/resultCoordinates')
const schema = require('../../schemas/definition-1.0')
const { getLatestVersion } = require('../../lib/utils')
const logger = require('../logging/logger')

class AbstractFileStore {
constructor(options) {
this.options = options
this.options = options || {}
this.logger = this.options.logger || logger()
}

async initialize() {}
Expand Down Expand Up @@ -146,6 +149,25 @@ class AbstractFileStore {
.join('/')
.toLowerCase()
}

static getLatestToolPaths(paths, toResultCoordinates = path => this.toResultCoordinatesFromStoragePath(path)) {
const entries = paths
.map(path => {
const { tool, toolVersion } = toResultCoordinates(path)
return { tool, toolVersion, path }
})
.reduce((latest, { tool, toolVersion, path }) => {
if (!tool || !toolVersion) return latest
latest[tool] = latest[tool] || {}
//if the version is greater than the current version, replace it
if (!latest[tool].toolVersion || getLatestVersion([toolVersion, latest[tool].toolVersion]) === toolVersion) {
latest[tool] = { toolVersion, path }
}
return latest
}, {})
const latestPaths = Object.values(entries).map(entry => entry.path)
return new Set(latestPaths)
}
}

module.exports = AbstractFileStore
81 changes: 58 additions & 23 deletions providers/stores/azblobHarvestStore.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,34 +46,40 @@ class AzHarvestBlobStore extends AbstractAzBlobStore {
* @param {EntityCoordinates} coordinates - The component revision to report on
* @returns An object with a property for each tool and tool version
*/
getAll(coordinates) {
const name = this._toStoragePathFromCoordinates(coordinates)
async getAll(coordinates) {
// Note that here we are assuming the number of blobs will be small-ish (<10) and
// a) all fit in memory reasonably, and
// b) fit in one list call (i.e., <5000)
const list = new Promise((resolve, reject) =>
const allFilesList = await this._getListOfAllFiles(coordinates)
return await this._getContent(allFilesList)
}

_getListOfAllFiles(coordinates) {
const name = this._toStoragePathFromCoordinates(coordinates)
return new Promise((resolve, reject) =>
this.blobService.listBlobsSegmentedWithPrefix(this.containerName, name, null, resultOrError(resolve, reject))
).then(files =>
files.entries.filter(file => {
return (
file.name.length === name.length || // either an exact match, or
(file.name.length > name.length && // a longer string
(file.name[name.length] === '/' || // where the next character starts extra tool indications
file.name.substr(name.length) === '.json'))
)
})
)
}

_getContent(files) {
const contents = Promise.all(
files.map(file => {
return new Promise((resolve, reject) =>
this.blobService.getBlobToText(this.containerName, file.name, resultOrError(resolve, reject))
).then(result => {
return { name: file.name, content: JSON.parse(result) }
})
})
)
const contents = list.then(files => {
return Promise.all(
files.entries
.filter(file => {
return (
file.name.length === name.length || // either an exact match, or
(file.name.length > name.length && // a longer string
(file.name[name.length] === '/' || // where the next character starts extra tool indications
file.name.substr(name.length) === '.json')) // or is the end, identifying a json file extension
)
})
.map(file => {
return new Promise((resolve, reject) =>
this.blobService.getBlobToText(this.containerName, file.name, resultOrError(resolve, reject))
).then(result => {
return { name: file.name, content: JSON.parse(result) }
})
})
)
})
return contents.then(entries => {
return entries.reduce((result, entry) => {
const { tool, toolVersion } = this._toResultCoordinatesFromStoragePath(entry.name)
Expand All @@ -85,6 +91,35 @@ class AzHarvestBlobStore extends AbstractAzBlobStore {
}, {})
})
}

/**
* Get the latest version of each tool output for the given coordinates. The coordinates must be all the way down
* to a revision.
* @param {EntityCoordinates} coordinates - The component revision to report on
* @returns {Promise} A promise that resolves to an object with a property for each tool and tool version
*
*/
async getAllLatest(coordinates) {
const allFilesList = await this._getListOfAllFiles(coordinates)
const latestFilesList = this._getListOfLatestFiles(allFilesList)
return await this._getContent(latestFilesList)
}

_getListOfLatestFiles(allFiles) {
let latestFiles = []
const names = allFiles.map(file => file.name)
try {
const latest = this._getLatestToolPaths(names)
latestFiles = allFiles.filter(file => latest.has(file.name))
} catch (error) {
this.logger.error('Error getting latest files', error)
}
return latestFiles.length === 0 ? allFiles : latestFiles
}

_getLatestToolPaths(paths) {
return AbstractFileStore.getLatestToolPaths(paths, path => this._toResultCoordinatesFromStoragePath(path))
}
}

module.exports = options => new AzHarvestBlobStore(options)
50 changes: 46 additions & 4 deletions providers/stores/fileHarvestStore.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,24 @@ class FileHarvestStore extends AbstractFileStore {
*/
async getAll(coordinates) {
// TODO validate/enforce that the coordinates are down to the component revision
const root = this._toStoragePathFromCoordinates(coordinates)
// Note that here we are assuming the number of blobs will be small-ish (<10) and
// a) all fit in memory reasonably, and
// b) fit in one list call (i.e., <5000)
let files = null
const allFilesList = await this._getListOfAllFiles(coordinates)
return await this._getContent(allFilesList)
}

async _getListOfAllFiles(coordinates) {
const root = this._toStoragePathFromCoordinates(coordinates)
try {
files = await recursive(root, ['.DS_Store'])
return await recursive(root, ['.DS_Store'])
} catch (error) {
if (error.code === 'ENOENT') return {}
if (error.code === 'ENOENT') return []
throw error
}
}

async _getContent(files) {
const contents = await Promise.all(
files.map(file => {
return new Promise((resolve, reject) =>
Expand All @@ -74,6 +81,41 @@ class FileHarvestStore extends AbstractFileStore {
return result
}, {})
}

/**
* Get the latest version of each tool output for the given coordinates. The coordinates must be all the way down
* to a revision.
* @param {EntityCoordinates} coordinates - The component revision to report on
* @returns {Promise} A promise that resolves to an object with a property for each tool and tool version
*
*/
async getAllLatest(coordinates) {
const allFilesList = await this._getListOfAllFiles(coordinates)
const latestFilesList = this._getListOfLatestFiles(allFilesList)
return await this._getContent(latestFilesList)
}

_getListOfLatestFiles(allFiles) {
let latestFiles = []
try {
const latest = this._getLatestToolVersions(allFiles)
latestFiles = allFiles.filter(file => latest.has(file))
} catch (error) {
this.logger.error('Error getting latest files', error)
}
if (latestFiles.length === 0) {
this.logger.debug('No latest files found, returning all files')
return allFiles
}
if (latestFiles.length !== allFiles.length) {
this.logger.debug(`Using latest: \n${latestFiles}`)
}
return latestFiles
}

_getLatestToolVersions(paths) {
return AbstractFileStore.getLatestToolPaths(paths, path => this._toResultCoordinatesFromStoragePath(path))
}
}

module.exports = options => new FileHarvestStore(options)
67 changes: 66 additions & 1 deletion test/business/definitionServiceTest.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ const deepEqualInAnyOrder = require('deep-equal-in-any-order')
const chai = require('chai')
chai.use(deepEqualInAnyOrder)
const expect = chai.expect
const FileHarvestStore = require('../../providers/stores/fileHarvestStore')
const SummaryService = require('../../business/summarizer')
const AggregatorService = require('../../business/aggregator')

describe('Definition Service', () => {
it('invalidates single coordinate', async () => {
Expand Down Expand Up @@ -310,6 +313,68 @@ describe('Definition Service Facet management', () => {
})
})

describe('Integration test', () => {
let fileHarvestStore
beforeEach(() => {
fileHarvestStore = createFileHarvestStore()
})

it('computes the same definition with latest harvest data', async () => {
const coordinates = EntityCoordinates.fromString('npm/npmjs/-/debug/3.1.0')
const allHarvestData = await fileHarvestStore.getAll(coordinates)
delete allHarvestData['scancode']['2.9.0+b1'] //remove invalid scancode version
let service = setupDefinitionService(allHarvestData)
const baseline_def = await service.compute(coordinates)

const latestHarvestData = await fileHarvestStore.getAllLatest(coordinates)
service = setupDefinitionService(latestHarvestData)
const comparison_def = await service.compute(coordinates)

//updated timestamp is not deterministic
expect(comparison_def._meta.updated).to.not.equal(baseline_def._meta.updated)
comparison_def._meta.updated = baseline_def._meta.updated
expect(comparison_def).to.deep.equal(baseline_def)
})
})

function createFileHarvestStore() {
const options = {
location: 'test/fixtures/store',
logger: {
error: () => {},
debug: () => {}
}
}
return FileHarvestStore(options)
}

function setupDefinitionService(rawHarvestData) {
const harvestStore = { getAllLatest: () => Promise.resolve(rawHarvestData) }
const summary = SummaryService({})

const tools = [['clearlydefined', 'reuse', 'licensee', 'scancode', 'fossology', 'cdsource']]
const aggregator = AggregatorService({ precedence: tools })
aggregator.logger = { info: sinon.stub() }
const curator = {
get: () => Promise.resolve(),
apply: (_coordinates, _curationSpec, definition) => Promise.resolve(definition),
autoCurate: () => {}
}
return setupWithDelegates(curator, harvestStore, summary, aggregator)
}

function setupWithDelegates(curator, harvestStore, summary, aggregator) {
const store = { delete: sinon.stub(), get: sinon.stub(), store: sinon.stub() }
const search = { delete: sinon.stub(), store: sinon.stub() }
const cache = { delete: sinon.stub(), get: sinon.stub(), set: sinon.stub() }

const harvestService = { harvest: () => sinon.stub() }
const service = DefinitionService(harvestStore, harvestService, summary, aggregator, curator, store, search, cache)
service.logger = { info: sinon.stub(), debug: () => {} }
service._harvest = sinon.stub()
return service
}

function validate(definition) {
// Tack on a dummy coordinates to keep the schema happy. Tool summarizations do not have to include coordinates
definition.coordinates = { type: 'npm', provider: 'npmjs', namespace: null, name: 'foo', revision: '1.0' }
Expand Down Expand Up @@ -342,7 +407,7 @@ function setup(definition, coordinateSpec, curation) {
return
}
}
const harvestStore = { getAll: () => Promise.resolve(null) }
const harvestStore = { getAllLatest: () => Promise.resolve(null) }
const harvestService = { harvest: () => sinon.stub() }
const summary = { summarizeAll: () => Promise.resolve(null) }
const aggregator = { process: () => Promise.resolve(definition) }
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"_metadata":{"type":"fossology","url":"cd:/npm/npmjs/-/debug/3.1.0/tool/scancode","fetchedAt":"2018-09-26T08:09:09.693Z","links":{"self":{"href":"urn:npm:npmjs:-:debug:revision:3.1.0:tool:fossology:3.3.0","type":"resource"},"siblings":{"href":"urn:npm:npmjs:-:debug:revision:3.1.0:tool:scancode","type":"collection"}},"version":"3.3.0","processedAt":"2018-09-26T08:09:10.447Z"},"nomos":{"version":"3.3.0","parameters":"-ld /tmp/cd-cONOHO","output":{"contentType":"text/plain","content":"File package/karma.conf.js contains license(s) No_license_found\nFile package/Makefile contains license(s) No_license_found\nFile package/src/debug.js contains license(s) No_license_found\nFile package/src/browser.js contains license(s) No_license_found\nFile package/README.md contains license(s) MIT\nFile package/.coveralls.yml contains license(s) No_license_found\nFile package/LICENSE contains license(s) MIT\nFile package/.eslintrc contains license(s) No_license_found\nFile package/node.js contains license(s) No_license_found\nFile package/src/node.js contains license(s) No_license_found\nFile package/src/index.js contains license(s) No_license_found\nFile package/.travis.yml contains license(s) No_license_found\nFile package/.npmignore contains license(s) No_license_found\nFile package/CHANGELOG.md contains license(s) No_license_found\nFile package/package.json contains license(s) MIT\n"}}}

Large diffs are not rendered by default.

Loading

0 comments on commit f79a63c

Please sign in to comment.