Skip to content

Commit

Permalink
Reworking filter provider (#5148)
Browse files Browse the repository at this point in the history
There are a few things here:
* The filters should work on `DataIdentifierBorrowed` instead of
`DataRequest`; metadata should not be relevant for filtering
* This implies a rename from `RequestFilterDataProvider` to
`FilterDataProvider`
* previously the provider implementations returned
`DataErrorKind::Filtered` for filtered ids, however this does not work
with fallback. They should return `DataErrorKind::IdentiferNotFound`; to
a caller (like the fallback adapter) it should be opaque whether the
pipeline contains a filter or not
* `filter_by_langid_allowlist_strict` is marked as "will be replaced
with a smarter algorithm for locale filtering". This smarter algorithm
now exists, so this should be deleted (fixes #834)
* The `Filterable` blanket trait adds `.filterable` to every type in
this crate's rustdoc, and to every type in dev docs. I replaced it with
a normal constructor.
  • Loading branch information
robertbastian authored Jun 28, 2024
1 parent b021268 commit dc29c76
Show file tree
Hide file tree
Showing 12 changed files with 105 additions and 298 deletions.
11 changes: 5 additions & 6 deletions ffi/capi/bindings/c/ICU4XDataError.d.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 10 additions & 12 deletions ffi/capi/bindings/cpp/ICU4XDataError.d.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion ffi/capi/bindings/cpp/ICU4XDataError.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions ffi/capi/bindings/dart/DataError.g.dart

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions ffi/capi/bindings/js/ICU4XDataError.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 10 additions & 13 deletions ffi/capi/bindings/js/ICU4XDataError.mjs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 5 additions & 8 deletions ffi/capi/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@ pub mod ffi {
MarkerNotFound = 0x01,
IdentifierNotFound = 0x02,
InvalidRequest = 0x03,
FilteredResource = 0x04,
InconsistentData = 0x05,
Downcast = 0x06,
Deserialize = 0x07,
Custom = 0x08,
Io = 0x09,
InconsistentData = 0x04,
Downcast = 0x05,
Deserialize = 0x06,
Custom = 0x07,
Io = 0x08,
}

#[derive(Debug, PartialEq, Eq)]
Expand Down Expand Up @@ -133,7 +132,6 @@ impl From<DataError> for ICU4XError {
DataErrorKind::MarkerNotFound => ICU4XError::DataMissingDataMarkerError,
DataErrorKind::IdentifierNotFound => ICU4XError::DataMissingLocaleError,
DataErrorKind::InvalidRequest => ICU4XError::DataExtraneousLocaleError,
DataErrorKind::Filtered => ICU4XError::DataFilteredResourceError,
DataErrorKind::Downcast(..) => ICU4XError::DataMismatchedTypeError,
DataErrorKind::Custom => ICU4XError::DataCustomError,
#[cfg(all(
Expand All @@ -152,7 +150,6 @@ impl From<DataError> for ICU4XDataError {
DataErrorKind::MarkerNotFound => Self::MarkerNotFound,
DataErrorKind::IdentifierNotFound => Self::IdentifierNotFound,
DataErrorKind::InvalidRequest => Self::InvalidRequest,
DataErrorKind::Filtered => Self::FilteredResource,
DataErrorKind::InconsistentData(..) => Self::InconsistentData,
DataErrorKind::Downcast(..) => Self::Downcast,
DataErrorKind::Deserialize => Self::Deserialize,
Expand Down
173 changes: 26 additions & 147 deletions provider/adapters/src/filter/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,37 @@ use super::*;
use alloc::boxed::Box;
use icu_provider::prelude::*;

use icu_locale::LanguageIdentifier;

type RequestFilterDataProviderOutput<'a, D> =
RequestFilterDataProvider<D, Box<dyn Fn(DataRequest) -> bool + Sync + 'a>>;
impl<D> FilterDataProvider<D, fn(DataIdentifierBorrowed) -> bool> {
/// Creates a [`FilterDataProvider`] that does not do any filtering.
///
/// Filters can be added using [`Self::with_filter`].
pub fn new(provider: D, filter_name: &'static str) -> Self {
Self {
inner: provider,
predicate: |_| true,
filter_name,
}
}
}

impl<D, F> RequestFilterDataProvider<D, F>
impl<D, F> FilterDataProvider<D, F>
where
F: Fn(DataRequest) -> bool + Sync,
F: Fn(DataIdentifierBorrowed) -> bool + Sync,
{
/// Filter out data requests with certain langids according to the predicate function. The
/// predicate should return `true` to allow a langid and `false` to reject a langid.
///
/// Data requests with no langid will be allowed. To reject data requests without a langid,
/// chain this with [`Self::require_langid`].
///
/// # Examples
///
/// ```
/// use icu_locale::LanguageIdentifier;
/// use icu_locale::{langid, subtags::language};
/// use icu_provider::hello_world::*;
/// use icu_provider::prelude::*;
/// use icu_provider_adapters::filter::Filterable;
/// use icu_provider_adapters::filter::FilterDataProvider;
///
/// let provider = HelloWorldProvider
/// .filterable("Demo no-English filter")
/// .filter_by_langid(|langid| langid.language != language!("en"));
/// let provider = FilterDataProvider::new(HelloWorldProvider, "Demo no-English filter")
/// .with_filter(|id| id.locale.language() != language!("en"));
///
/// // German requests should succeed:
/// let de = DataIdentifierCow::from_locale(langid!("de").into());
Expand All @@ -58,7 +62,7 @@ where
/// assert!(matches!(
/// response,
/// Err(DataError {
/// kind: DataErrorKind::Filtered,
/// kind: DataErrorKind::IdentifierNotFound,
/// ..
/// })
/// ));
Expand All @@ -70,147 +74,22 @@ where
/// assert!(available_ids.contains(&DataIdentifierCow::from_locale(langid!("de").into())));
/// assert!(!available_ids.contains(&DataIdentifierCow::from_locale(langid!("en").into())));
/// ```
pub fn filter_by_langid<'a>(
#[allow(clippy::type_complexity)]
pub fn with_filter<'a>(
self,
predicate: impl Fn(&LanguageIdentifier) -> bool + Sync + 'a,
) -> RequestFilterDataProviderOutput<'a, D>
where
F: 'a,
{
let old_predicate = self.predicate;
RequestFilterDataProvider {
inner: self.inner,
predicate: Box::new(move |request| -> bool {
if !(old_predicate)(request) {
return false;
}
predicate(&request.id.locale.get_langid())
}),
filter_name: self.filter_name,
}
}

/// Filter out data request except those having a language identifier that exactly matches
/// one in the allowlist.
///
/// This will be replaced with a smarter algorithm for locale filtering; see
/// <https://github.com/unicode-org/icu4x/issues/834>
///
/// Data requests with no langid will be allowed. To reject data requests without a langid,
/// chain this with [`Self::require_langid`].
///
/// # Examples
///
/// ```
/// use icu_locale::langid;
/// use icu_provider::hello_world::*;
/// use icu_provider::prelude::*;
/// use icu_provider_adapters::filter::Filterable;
///
/// let allowlist = [langid!("de"), langid!("zh")];
/// let provider = HelloWorldProvider
/// .filterable("Demo German+Chinese filter")
/// .filter_by_langid_allowlist_strict(&allowlist);
///
/// // German requests should succeed:
/// let de = DataIdentifierCow::from_locale(langid!("de").into());
/// let response: Result<DataResponse<HelloWorldV1Marker>, _> =
/// provider.load(DataRequest {
/// id: de.as_borrowed(),
/// ..Default::default()
/// });
/// assert!(matches!(response, Ok(_)));
///
/// // English requests should fail:
/// let en = DataIdentifierCow::from_locale(langid!("en-US").into());
/// let response: Result<DataResponse<HelloWorldV1Marker>, _> =
/// provider.load(DataRequest {
/// id: en.as_borrowed(),
/// ..Default::default()
/// });
/// assert!(matches!(
/// response,
/// Err(DataError {
/// kind: DataErrorKind::Filtered,
/// ..
/// })
/// ));
/// assert_eq!(
/// response.unwrap_err().str_context,
/// Some("Demo German+Chinese filter")
/// );
/// ```
pub fn filter_by_langid_allowlist_strict<'a>(
self,
allowlist: &'a [LanguageIdentifier],
) -> RequestFilterDataProviderOutput<'a, D>
where
F: 'a,
{
let old_predicate = self.predicate;
RequestFilterDataProvider {
inner: self.inner,
predicate: Box::new(move |request| -> bool {
if !(old_predicate)(request) {
return false;
}
request.id.locale.is_langid_und()
|| allowlist.contains(&request.id.locale.get_langid())
}),
filter_name: self.filter_name,
}
}

/// Require that data requests contain a langid.
///
/// # Examples
///
/// ```
/// use icu_locale::langid;
/// use icu_provider::hello_world::*;
/// use icu_provider::prelude::*;
/// use icu_provider_adapters::filter::Filterable;
///
/// let provider = HelloWorldProvider
/// .filterable("Demo require-langid filter")
/// .require_langid();
///
/// // Requests with a data id should succeed:
/// let id = DataIdentifierCow::from_locale(langid!("de").into());
/// let response: Result<DataResponse<HelloWorldV1Marker>, _> =
/// provider.load(DataRequest {
/// id: id.as_borrowed(),
/// ..Default::default()
/// });
/// assert!(matches!(response, Ok(_)));
///
/// // Requests without a data should fail:
/// let response: Result<DataResponse<HelloWorldV1Marker>, _> =
/// provider.load(DataRequest {
/// id: Default::default(),
/// ..Default::default()
/// });
/// assert!(matches!(
/// response,
/// Err(DataError {
/// kind: DataErrorKind::Filtered,
/// ..
/// })
/// ));
/// ```
pub fn require_langid<'a>(self) -> RequestFilterDataProviderOutput<'a, D>
predicate: impl Fn(DataIdentifierBorrowed) -> bool + Sync + 'a,
) -> FilterDataProvider<D, Box<dyn Fn(DataIdentifierBorrowed) -> bool + Sync + 'a>>
where
F: 'a,
{
let old_predicate = self.predicate;
RequestFilterDataProvider {
FilterDataProvider {
inner: self.inner,
predicate: Box::new(move |request| -> bool {
if !(old_predicate)(request) {
predicate: Box::new(move |id| -> bool {
if !(old_predicate)(id) {
return false;
}
!request.id.locale.is_langid_und()
predicate(id)
}),
filter_name: self.filter_name,
}
Expand Down
Loading

0 comments on commit dc29c76

Please sign in to comment.