Skip to content

Commit

Permalink
Score de disponibilité : ignore documentation et plus de pénalité pou…
Browse files Browse the repository at this point in the history
…r score à 0 (#3636)
  • Loading branch information
AntoineAugusti authored Dec 7, 2023
1 parent affbcb0 commit b7b0cf2
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 15 deletions.
17 changes: 3 additions & 14 deletions apps/transport/lib/jobs/dataset_quality_score.ex
Original file line number Diff line number Diff line change
Expand Up @@ -268,34 +268,23 @@ defmodule Transport.Jobs.DatasetAvailabilityScore do
Saves and computes an availability score for a dataset.
To compute this score:
- get the dataset's current resources
- get the dataset's current resources, excluding documentation resources
- for each resource, give it a score based on its availability over the last 24 hours
- we compute an average of those scores to get a score at the dataset level
- that score is averaged with the dataset's last computed score, using exponential smoothing
(see the function `exp_smoothing/3`). This allows a score to reflect not only the current
dataset situation but also past situations.
If any resource as an availability score of 0 (under 95% of availability over the last 24 hours),
the availability score of the dataset will be 0.
The rationale is that the entire dataset may be unusable if a single resource cannot be fetched.
"""
import Ecto.Query
import Transport.Jobs.DatasetQualityScore

@spec current_dataset_availability(integer()) :: %{score: float | nil, details: map()}
def current_dataset_availability(dataset_id) do
resources = dataset_resources(dataset_id)
resources = dataset_id |> dataset_resources() |> Enum.reject(&DB.Resource.is_documentation?/1)
current_dataset_infos = resources |> Enum.map(&resource_availability(&1))
scores = current_dataset_infos |> Enum.map(fn %{availability: availability} -> availability end)

score =
if Enum.count(scores) > 0 and Enum.min(scores) == 0 do
0
else
average(scores)
end

%{score: score, details: %{resources: current_dataset_infos}}
%{score: average(scores), details: %{resources: current_dataset_infos}}
end

@spec resource_availability(DB.Resource.t()) :: %{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,23 @@ defmodule Transport.Test.Transport.Jobs.DatasetQualityScoreTest do
} == current_dataset_availability(dataset.id)
end

test "excludes documentation resources to compute the score" do
dataset = insert(:dataset, is_active: true)
r1 = insert(:resource, dataset: dataset, is_community_resource: false)
r2 = insert(:resource, dataset: dataset, is_community_resource: false, type: "documentation")

assert DB.Resource.is_documentation?(r2)

assert %{
details: %{
resources: [
%{availability: 1.0, raw_measure: nil, resource_id: r1.id}
]
},
score: 1.0
} == current_dataset_availability(dataset.id)
end

test "2 resources, one down for a long time" do
dataset = insert(:dataset, is_active: true)
r1 = insert(:resource, dataset: dataset, is_community_resource: false)
Expand All @@ -226,7 +243,7 @@ defmodule Transport.Test.Transport.Jobs.DatasetQualityScoreTest do
%{availability: 0.0, raw_measure: 0, resource_id: r2.id}
]
},
score: 0.0
score: 0.5
} == current_dataset_availability(dataset.id)
end

Expand Down

0 comments on commit b7b0cf2

Please sign in to comment.