Skip to content

Commit

Permalink
Merge branch 'main' into sub
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Oct 20, 2023
2 parents d53e4fc + 35212bf commit 0d7893c
Show file tree
Hide file tree
Showing 19 changed files with 8,199 additions and 2 deletions.
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ members = [
"utils/fixed_decimal",
"utils/litemap",
"utils/pattern",
"utils/resb",
"utils/tinystr",
"utils/tzif",
"utils/writeable",
Expand Down
49 changes: 49 additions & 0 deletions utils/resb/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# This file is part of ICU4X. For terms of use, please see the file
# called LICENSE at the top level of the ICU4X source tree
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

[package]
name = "resb"
description = "Utilities for reading and writing ICU resource bundle files"
version = "0.0.0"

authors.workspace = true
categories.workspace = true
edition.workspace = true
include.workspace = true
license-file.workspace = true
repository.workspace = true
rust-version.workspace = true

[package.metadata.workspaces]
independent = true

[package.metadata.docs.rs]
all-features = true

[dependencies]
indexmap = { version = "1.9.3", optional = true }
log = { version = "0.4", optional = true }
nom = { version = "7.1.3", optional = true }
serde = "1.0"

[dev-dependencies]
serde = { version = "1.0", features = ["derive"] }
zerovec = { workspace = true, features = ["serde"] }

[features]
default = []
logging = ["dep:log"]
serialize = ["std"]
std = []
text = ["dep:indexmap", "dep:nom", "std"]

[lib]
name = "resb"

[[example]]
name = "genrb"
required-features = ["serialize", "text"]

[[example]]
name = "time_zone_rule"
44 changes: 44 additions & 0 deletions utils/resb/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
UNICODE LICENSE V3

COPYRIGHT AND PERMISSION NOTICE

Copyright © 2020-2023 Unicode, Inc.

NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.

Permission is hereby granted, free of charge, to any person obtaining a
copy of data files and any associated documentation (the "Data Files") or
software and any associated documentation (the "Software") to deal in the
Data Files or Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Data Files or Software, and to permit persons to whom the
Data Files or Software are furnished to do so, provided that either (a)
this copyright and permission notice appear with all copies of the Data
Files or Software, or (b) this copyright and permission notice appear in
associated Documentation.

THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS.

IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
FILES OR SOFTWARE.

Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.


Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
17 changes: 17 additions & 0 deletions utils/resb/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added utils/resb/examples/data/zoneinfo64.res
Binary file not shown.
3,547 changes: 3,547 additions & 0 deletions utils/resb/examples/data/zoneinfo64.txt

Large diffs are not rendered by default.

36 changes: 36 additions & 0 deletions utils/resb/examples/genrb.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use std::{
fs::File,
io::{BufReader, BufWriter, Read, Write},
};

use resb::{binary, text};

fn main() {
let input = File::open("examples/data/zoneinfo64.txt");
let mut reader = BufReader::new(input.unwrap());

let mut in_string = String::new();
match reader.read_to_string(&mut in_string) {
Ok(_) => (),
Err(err) => panic!("Unable to read file: {}", err),
};

let (in_bundle, keys_in_discovery_order) = match text::Reader::read(&in_string) {
Ok(result) => result,
Err(err) => panic!("Failed to parse text bundle:\n{err}"),
};

let file = File::create("examples/data/zoneinfo64.res");
let mut writer = BufWriter::new(file.unwrap());

let bytes = binary::Serializer::to_bytes(&in_bundle, &keys_in_discovery_order)
.expect("Failed to generate binary bundle.");

writer
.write_all(&bytes)
.expect("Failed to write binary bundle.");
}
167 changes: 167 additions & 0 deletions utils/resb/examples/time_zone_rule.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use std::{
char::DecodeUtf16Error,
collections::HashMap,
fmt::Debug,
fs::File,
io::{BufReader, Read},
marker::PhantomData,
};

use serde::{
de::{self, Visitor},
Deserialize, Serialize,
};

use zerovec::ZeroVec;

/// A zero-copy representation of a little-endian UTF-16 string.
///
/// Unlike `String`, the contents are not required to be valid UTF-16. Consumers
/// are expected to validate the contents or use `try_into::<String>()`. No zero
/// terminator is included.
#[derive(Deserialize, Serialize)]
#[serde(transparent)]
pub struct ZeroUTF16String<'a> {
#[serde(borrow)]
units: ZeroVec<'a, u16>,
}

impl ZeroUTF16String<'_> {
/// Gets whether the UTF-16 string is empty.
pub fn is_empty(&self) -> bool {
self.units.is_empty()
}

/// Gets the count of units in the string.
///
/// This value does not necessarily equal the length of the string in
/// characters, as characters outside the Basic Multilingual Plane are
/// represented by 2 units.
pub fn len(&self) -> usize {
self.units.len()
}

/// Gets an iterator for the units of the string.
///
/// See `len` for details on why this does not correspond to characters.
pub fn iter(&self) -> impl Iterator<Item = u16> + '_ {
self.units.iter()
}
}

impl TryFrom<ZeroUTF16String<'_>> for String {
type Error = DecodeUtf16Error;

fn try_from(value: ZeroUTF16String<'_>) -> Result<Self, Self::Error> {
char::decode_utf16(value.iter()).collect::<Result<String, _>>()
}
}

impl Debug for ZeroUTF16String<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let decoded = char::decode_utf16(self.iter())
.map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
.collect::<String>();
write!(f, "{}", decoded)
}
}

#[derive(Debug, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct TzDataRuleData<'a> {
#[serde(borrow)]
type_offsets: ZeroVec<'a, i32>,
#[serde(borrow)]
trans: Option<ZeroVec<'a, i32>>,
#[serde(borrow)]
trans_pre32: Option<ZeroVec<'a, i32>>,
#[serde(borrow)]
trans_post32: Option<ZeroVec<'a, i32>>,
type_map: Option<&'a [u8]>,
#[serde(borrow)]
final_rule: Option<ZeroUTF16String<'a>>,
final_raw: Option<i32>,
final_year: Option<u32>,
#[serde(borrow)]
links: Option<ZeroVec<'a, u32>>,
}

#[derive(Debug)]
pub enum TzDataRule<'a> {
// The rule data is boxed here due to the large size difference between the
// `TzDataRuleData` struct and `u32`. It's not strictly necessary.
Table(Box<TzDataRuleData<'a>>),
Int(u32),
}

impl<'de: 'a, 'a> Deserialize<'de> for TzDataRule<'a> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
deserializer.deserialize_any(TzDataRuleEnumVisitor {
phantom: PhantomData,
})
}
}

struct TzDataRuleEnumVisitor<'a> {
phantom: PhantomData<TzDataRule<'a>>,
}

impl<'de: 'a, 'a> Visitor<'de> for TzDataRuleEnumVisitor<'a> {
type Value = TzDataRule<'a>;

fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("an unsigned 32-bit integer or a table of rule data")
}

fn visit_u32<E>(self, v: u32) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(TzDataRule::Int(v))
}

fn visit_map<A>(self, map: A) -> Result<Self::Value, A::Error>
where
A: serde::de::MapAccess<'de>,
{
let value = TzDataRuleData::deserialize(de::value::MapAccessDeserializer::new(map))?;

Ok(TzDataRule::Table(Box::new(value)))
}
}

#[derive(Debug, Deserialize)]
#[serde(rename = "zoneinfo64")]
#[serde(rename_all = "PascalCase")]
pub struct ZoneInfo64<'a> {
#[serde(borrow)]
pub zones: Vec<TzDataRule<'a>>,
#[serde(borrow)]
pub names: Vec<ZeroUTF16String<'a>>,
#[serde(borrow)]
pub rules: HashMap<&'a str, ZeroVec<'a, i32>>,
#[serde(borrow)]
pub regions: Vec<ZeroUTF16String<'a>>,
}

fn main() {
let input = File::open("examples/data/zoneinfo64.res");
let mut reader = BufReader::new(input.unwrap());

let mut in_bytes = Vec::new();
reader
.read_to_end(&mut in_bytes)
.expect("Unable to read resource bundle file");

let out = resb::binary::from_bytes::<ZoneInfo64>(&in_bytes)
.expect("Error processing resource bundle file");

println!("{:#?}", out);
}
Loading

0 comments on commit 0d7893c

Please sign in to comment.