Skip to content

Commit

Permalink
analyzer: Change PackageCurationProvider to support bulk requests
Browse files Browse the repository at this point in the history
Requesting curations for multiple packages / ids at once can be much
more performant than single request, depending on the actual provider
implementation.

Partly resolves #3905.

Signed-off-by: Sebastian Schuberth <sebastian.schuberth@bosch.io>
  • Loading branch information
sschuberth committed Jan 4, 2022
1 parent 2836f18 commit ebfc988
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 67 deletions.
23 changes: 10 additions & 13 deletions analyzer/src/main/kotlin/AnalyzerResultBuilder.kt
Original file line number Diff line number Diff line change
Expand Up @@ -83,23 +83,20 @@ class AnalyzerResultBuilder(private val curationProvider: PackageCurationProvide
* independently of a [ProjectAnalyzerResult].
*/
fun addPackages(packageSet: Set<Package>): AnalyzerResultBuilder {
val (curatedPackages, duration) = measureTimedValue {
packageSet.map { pkg ->
val curations = curationProvider.getCurationsFor(pkg.id)
curations.fold(pkg.toCuratedPackage()) { cur, packageCuration ->
log.debug {
"Applying curation '$packageCuration' to package '${pkg.id.toCoordinates()}'."
}

packageCuration.apply(cur)
val (curations, duration) = measureTimedValue { curationProvider.getCurationsFor(packageSet.map { it.id }) }

log.perf { "Getting package curations took $duration." }

packages += packageSet.map { pkg ->
curations[pkg.id].orEmpty().fold(pkg.toCuratedPackage()) { cur, packageCuration ->
log.debug {
"Applying curation '$packageCuration' to package '${pkg.id.toCoordinates()}'."
}

packageCuration.apply(cur)
}
}

packages += curatedPackages

log.perf { "Getting package curations took $duration." }

return this
}

Expand Down
6 changes: 3 additions & 3 deletions analyzer/src/main/kotlin/PackageCurationProvider.kt
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ fun interface PackageCurationProvider {
* A provider that does not provide any curations.
*/
@JvmField
val EMPTY = PackageCurationProvider { emptyList() }
val EMPTY = PackageCurationProvider { emptyMap() }
}

/**
* Get all available [PackageCuration]s for the provided [pkgId].
* Get all available [PackageCuration]s for the provided [pkgIds].
*/
fun getCurationsFor(pkgId: Identifier): List<PackageCuration>
fun getCurationsFor(pkgIds: Collection<Identifier>): Map<Identifier, List<PackageCuration>>
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import okhttp3.OkHttpClient

import org.ossreviewtoolkit.analyzer.PackageCurationProvider
import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService
import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService.Coordinates
import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService.Server
import org.ossreviewtoolkit.clients.clearlydefined.ClearlyDefinedService.SourceLocation
import org.ossreviewtoolkit.clients.clearlydefined.ComponentType
Expand Down Expand Up @@ -92,14 +93,18 @@ class ClearlyDefinedPackageCurationProvider(

private val service by lazy { ClearlyDefinedService.create(serverUrl, client ?: OkHttpClientHelper.buildClient()) }

override fun getCurationsFor(pkgId: Identifier): List<PackageCuration> {
val (type, provider) = pkgId.toClearlyDefinedTypeAndProvider() ?: return emptyList()
val namespace = pkgId.namespace.takeUnless { it.isEmpty() } ?: "-"
override fun getCurationsFor(pkgIds: Collection<Identifier>): Map<Identifier, List<PackageCuration>> {
val coordinatesToIds = pkgIds.mapNotNull { pkgId ->
pkgId.toClearlyDefinedTypeAndProvider()?.let { (type, provider) ->
val namespace = pkgId.namespace.takeUnless { it.isEmpty() } ?: "-"
Coordinates(type, provider, namespace, pkgId.name, pkgId.version) to pkgId
}
}.toMap()

val curation = runCatching {
val contributedCurations = runCatching {
// TODO: Maybe make PackageCurationProvider.getCurationsFor() a suspend function; then all derived
// classes could deal with coroutines more easily.
runBlocking(Dispatchers.IO) { service.getCuration(type, provider, namespace, pkgId.name, pkgId.version) }
runBlocking(Dispatchers.IO) { service.getCurations(coordinatesToIds.keys) }
}.onFailure { e ->
when (e) {
is HttpException -> {
Expand All @@ -110,47 +115,51 @@ class ClearlyDefinedPackageCurationProvider(

log.warn {
val message = e.response()?.errorBody()?.string() ?: e.collectMessagesAsString()
"Getting curations for '${pkgId.toCoordinates()}' failed with code ${e.code()}: $message"
"Getting curations failed with code ${e.code()}: $message"
}
}
}

is JsonMappingException -> {
e.showStackTrace()

log.warn { "Deserializing the ClearlyDefined curation for '${pkgId.toCoordinates()}' failed." }
log.warn { "Deserializing the curations failed." }
}

else -> {
e.showStackTrace()

log.warn {
"Querying ClearlyDefined curation for '${pkgId.toCoordinates()}' failed: " +
e.collectMessagesAsString()
}
log.warn { "Querying curations failed: ${e.collectMessagesAsString()}" }
}
}
}.getOrNull() ?: return emptyList()
}.getOrNull() ?: return emptyMap()

val declaredLicenseParsed = curation.licensed?.declared?.let { declaredLicense ->
// Only take curations of good quality (i.e. those not using deprecated identifiers) and in particular none
// that contain "OTHER" as a license, also see https://github.com/clearlydefined/curated-data/issues/7836.
runCatching { declaredLicense.toSpdx(SpdxExpression.Strictness.ALLOW_CURRENT) }.getOrNull()
}
val curations = mutableMapOf<Identifier, MutableList<PackageCuration>>()

val sourceLocation = curation.described?.sourceLocation.toArtifactOrVcs()
contributedCurations.forEach { (_, contributions) ->
contributions.curations.forEach inner@{ (coordinates, curation) ->
val pkgId = coordinatesToIds[coordinates] ?: return@inner

val pkgCuration = PackageCuration(
id = pkgId,
data = PackageCurationData(
concludedLicense = declaredLicenseParsed,
homepageUrl = curation.described?.projectWebsite?.toString(),
sourceArtifact = sourceLocation as? RemoteArtifact,
vcs = sourceLocation as? VcsInfoCurationData,
comment = "Provided by ClearlyDefined."
)
)
val declaredLicenseParsed = curation.licensed?.declared?.let { declaredLicense ->
// Only take curations of good quality (i.e. those not using deprecated identifiers) and in
// particular none that contain "OTHER" as a license, also see
// https://github.com/clearlydefined/curated-data/issues/7836.
runCatching { declaredLicense.toSpdx(SpdxExpression.Strictness.ALLOW_CURRENT) }.getOrNull()
}

val sourceLocation = curation.described?.sourceLocation.toArtifactOrVcs()

curations.getOrPut(pkgId) { mutableListOf() } += PackageCuration(
id = pkgId,
data = PackageCurationData(
concludedLicense = declaredLicenseParsed,
homepageUrl = curation.described?.projectWebsite?.toString(),
sourceArtifact = sourceLocation as? RemoteArtifact,
vcs = sourceLocation as? VcsInfoCurationData,
comment = "Provided by ClearlyDefined."
)
)
}
}

return listOf(pkgCuration)
return curations
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,23 @@ package org.ossreviewtoolkit.analyzer.curation

import org.ossreviewtoolkit.analyzer.PackageCurationProvider
import org.ossreviewtoolkit.model.Identifier
import org.ossreviewtoolkit.model.PackageCuration

/**
* A curation provider that does not provide any curations on its own, but searches the given list of [providers] for
* the first matching curation, and falls back to the next provider in the list if there is no match.
*/
class FallbackPackageCurationProvider(private val providers: List<PackageCurationProvider>) : PackageCurationProvider {
override fun getCurationsFor(pkgId: Identifier) =
providers.asSequence().map { it.getCurationsFor(pkgId) }.find { it.isNotEmpty() }.orEmpty()
override fun getCurationsFor(pkgIds: Collection<Identifier>): Map<Identifier, List<PackageCuration>> {
val curations = mutableMapOf<Identifier, List<PackageCuration>>()
val remainingPkgIds = pkgIds.toMutableSet()

for (provider in providers) {
if (remainingPkgIds.isEmpty()) break
curations += provider.getCurationsFor(remainingPkgIds)
remainingPkgIds -= curations.keys
}

return curations
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,8 @@ import org.ossreviewtoolkit.model.PackageCuration
open class SimplePackageCurationProvider(
val packageCurations: Collection<PackageCuration>
) : PackageCurationProvider {
override fun getCurationsFor(pkgId: Identifier) = packageCurations.filter { it.isApplicable(pkgId) }
override fun getCurationsFor(pkgIds: Collection<Identifier>) =
pkgIds.mapNotNull { pkgId ->
packageCurations.filter { it.isApplicable(pkgId) }.takeUnless { it.isEmpty() }?.let { pkgId to it }
}.toMap()
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,12 @@ class Sw360PackageCurationProvider(sw360Configuration: Sw360StorageConfiguration
jsonMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
)

override fun getCurationsFor(pkgId: Identifier): List<PackageCuration> {
override fun getCurationsFor(pkgIds: Collection<Identifier>) =
pkgIds.mapNotNull { pkgId ->
getCurationsFor(pkgId).takeUnless { it.isEmpty() }?.let { pkgId to it }
}.toMap()

private fun getCurationsFor(pkgId: Identifier): List<PackageCuration> {
val name = listOfNotNull(pkgId.namespace, pkgId.name).joinToString("/")
val sw360ReleaseClient = sw360Connection.releaseAdapter

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import com.github.tomakehurst.wiremock.client.WireMock.get
import com.github.tomakehurst.wiremock.core.WireMockConfiguration

import io.kotest.core.spec.style.WordSpec
import io.kotest.matchers.collections.beEmpty
import io.kotest.matchers.maps.beEmpty
import io.kotest.matchers.should

import java.time.Duration
Expand Down Expand Up @@ -68,8 +68,9 @@ class ClearlyDefinedPackageCurationProviderMockTest : WordSpec({
}

val provider = ClearlyDefinedPackageCurationProvider("http://localhost:${server.port()}", client)
val ids = listOf(Identifier("Maven:some-ns:some-component:1.2.3"))

provider.getCurationsFor(Identifier("Maven:some-ns:some-component:1.2.3")) should beEmpty()
provider.getCurationsFor(ids) should beEmpty()
}
}
})
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
package org.ossreviewtoolkit.analyzer.curation

import io.kotest.core.spec.style.WordSpec
import io.kotest.matchers.collections.beEmpty
import io.kotest.matchers.collections.haveSize
import io.kotest.matchers.maps.beEmpty
import io.kotest.matchers.maps.haveSize
import io.kotest.matchers.should
import io.kotest.matchers.shouldBe

Expand All @@ -35,18 +35,18 @@ class ClearlyDefinedPackageCurationProviderTest : WordSpec({
val provider = ClearlyDefinedPackageCurationProvider()

val identifier = Identifier("Maven:javax.servlet:javax.servlet-api:3.1.0")
val curations = provider.getCurationsFor(identifier)
val curations = provider.getCurationsFor(listOf(identifier))

curations should haveSize(1)
curations.first().data.concludedLicense shouldBe
curations.values.flatten().first().data.concludedLicense shouldBe
"CDDL-1.0 OR GPL-2.0-only WITH Classpath-exception-2.0".toSpdx()
}

"return no curation for a non-existing dummy NPM package" {
val provider = ClearlyDefinedPackageCurationProvider()

val identifier = Identifier("NPM:@scope:name:1.2.3")
val curations = provider.getCurationsFor(identifier)
val curations = provider.getCurationsFor(listOf(identifier))

curations should beEmpty()
}
Expand All @@ -57,17 +57,20 @@ class ClearlyDefinedPackageCurationProviderTest : WordSpec({
val provider = ClearlyDefinedPackageCurationProvider(Server.DEVELOPMENT)

val identifier = Identifier("NPM:@nestjs:platform-express:6.2.3")
val curations = provider.getCurationsFor(identifier)
val curations = provider.getCurationsFor(listOf(identifier))

curations should haveSize(1)
curations.first().data.concludedLicense shouldBe "Apache-1.0".toSpdx()

// TODO: Find out why the server returns "Apache-1.0" here for a GET (single) request, but "Apache-2.0" for
// a POST (bulk) request.
curations.values.flatten().first().data.concludedLicense shouldBe "Apache-2.0".toSpdx()
}

"return no curation for a non-existing dummy Maven package" {
val provider = ClearlyDefinedPackageCurationProvider()

val identifier = Identifier("Maven:group:name:1.2.3")
val curations = provider.getCurationsFor(identifier)
val curations = provider.getCurationsFor(listOf(identifier))

curations should beEmpty()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class FilePackageCurationProviderTest : StringSpec() {
)

idsWithExistingCurations.forEach {
val curations = provider.getCurationsFor(it)
val curations = provider.getCurationsFor(listOf(it)).values.flatten()

curations should haveSize(1)
}
Expand All @@ -68,7 +68,7 @@ class FilePackageCurationProviderTest : StringSpec() {
)

idsWithExistingCurations.forEach {
val curations = provider.getCurationsFor(it)
val curations = provider.getCurationsFor(listOf(it)).values.flatten()

curations should haveSize(1)
}
Expand All @@ -78,7 +78,7 @@ class FilePackageCurationProviderTest : StringSpec() {
val provider = FilePackageCurationProvider(curationsFile)

val identifier = Identifier("maven", "org.hamcrest", "hamcrest-core", "1.3")
val curations = provider.getCurationsFor(identifier)
val curations = provider.getCurationsFor(listOf(identifier)).values.flatten()

curations should haveSize(4)
curations.forEach {
Expand All @@ -96,9 +96,9 @@ class FilePackageCurationProviderTest : StringSpec() {
val idMaxVersion = Identifier("npm", "", "ramda", "0.25.0")
val idOutVersion = Identifier("npm", "", "ramda", "0.26.0")

val curationsMinVersion = provider.getCurationsFor(idMinVersion)
val curationsMaxVersion = provider.getCurationsFor(idMaxVersion)
val curationsOutVersion = provider.getCurationsFor(idOutVersion)
val curationsMinVersion = provider.getCurationsFor(listOf(idMinVersion)).values.flatten()
val curationsMaxVersion = provider.getCurationsFor(listOf(idMaxVersion)).values.flatten()
val curationsOutVersion = provider.getCurationsFor(listOf(idOutVersion)).values.flatten()

curationsMinVersion should haveSize(1)
(provider.packageCurations - curationsMinVersion).forEach {
Expand Down

0 comments on commit ebfc988

Please sign in to comment.