-
Notifications
You must be signed in to change notification settings - Fork 185
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
19 changed files
with
8,199 additions
and
2 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# This file is part of ICU4X. For terms of use, please see the file | ||
# called LICENSE at the top level of the ICU4X source tree | ||
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
[package] | ||
name = "resb" | ||
description = "Utilities for reading and writing ICU resource bundle files" | ||
version = "0.0.0" | ||
|
||
authors.workspace = true | ||
categories.workspace = true | ||
edition.workspace = true | ||
include.workspace = true | ||
license-file.workspace = true | ||
repository.workspace = true | ||
rust-version.workspace = true | ||
|
||
[package.metadata.workspaces] | ||
independent = true | ||
|
||
[package.metadata.docs.rs] | ||
all-features = true | ||
|
||
[dependencies] | ||
indexmap = { version = "1.9.3", optional = true } | ||
log = { version = "0.4", optional = true } | ||
nom = { version = "7.1.3", optional = true } | ||
serde = "1.0" | ||
|
||
[dev-dependencies] | ||
serde = { version = "1.0", features = ["derive"] } | ||
zerovec = { workspace = true, features = ["serde"] } | ||
|
||
[features] | ||
default = [] | ||
logging = ["dep:log"] | ||
serialize = ["std"] | ||
std = [] | ||
text = ["dep:indexmap", "dep:nom", "std"] | ||
|
||
[lib] | ||
name = "resb" | ||
|
||
[[example]] | ||
name = "genrb" | ||
required-features = ["serialize", "text"] | ||
|
||
[[example]] | ||
name = "time_zone_rule" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
UNICODE LICENSE V3 | ||
|
||
COPYRIGHT AND PERMISSION NOTICE | ||
|
||
Copyright © 2020-2023 Unicode, Inc. | ||
|
||
NOTICE TO USER: Carefully read the following legal agreement. BY | ||
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR | ||
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE | ||
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT | ||
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a | ||
copy of data files and any associated documentation (the "Data Files") or | ||
software and any associated documentation (the "Software") to deal in the | ||
Data Files or Software without restriction, including without limitation | ||
the rights to use, copy, modify, merge, publish, distribute, and/or sell | ||
copies of the Data Files or Software, and to permit persons to whom the | ||
Data Files or Software are furnished to do so, provided that either (a) | ||
this copyright and permission notice appear with all copies of the Data | ||
Files or Software, or (b) this copyright and permission notice appear in | ||
associated Documentation. | ||
|
||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY | ||
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF | ||
THIRD PARTY RIGHTS. | ||
|
||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE | ||
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, | ||
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | ||
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, | ||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA | ||
FILES OR SOFTWARE. | ||
|
||
Except as contained in this notice, the name of a copyright holder shall | ||
not be used in advertising or otherwise to promote the sale, use or other | ||
dealings in these Data Files or Software without prior written | ||
authorization of the copyright holder. | ||
|
||
— | ||
|
||
Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. | ||
ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use std::{ | ||
fs::File, | ||
io::{BufReader, BufWriter, Read, Write}, | ||
}; | ||
|
||
use resb::{binary, text}; | ||
|
||
fn main() { | ||
let input = File::open("examples/data/zoneinfo64.txt"); | ||
let mut reader = BufReader::new(input.unwrap()); | ||
|
||
let mut in_string = String::new(); | ||
match reader.read_to_string(&mut in_string) { | ||
Ok(_) => (), | ||
Err(err) => panic!("Unable to read file: {}", err), | ||
}; | ||
|
||
let (in_bundle, keys_in_discovery_order) = match text::Reader::read(&in_string) { | ||
Ok(result) => result, | ||
Err(err) => panic!("Failed to parse text bundle:\n{err}"), | ||
}; | ||
|
||
let file = File::create("examples/data/zoneinfo64.res"); | ||
let mut writer = BufWriter::new(file.unwrap()); | ||
|
||
let bytes = binary::Serializer::to_bytes(&in_bundle, &keys_in_discovery_order) | ||
.expect("Failed to generate binary bundle."); | ||
|
||
writer | ||
.write_all(&bytes) | ||
.expect("Failed to write binary bundle."); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use std::{ | ||
char::DecodeUtf16Error, | ||
collections::HashMap, | ||
fmt::Debug, | ||
fs::File, | ||
io::{BufReader, Read}, | ||
marker::PhantomData, | ||
}; | ||
|
||
use serde::{ | ||
de::{self, Visitor}, | ||
Deserialize, Serialize, | ||
}; | ||
|
||
use zerovec::ZeroVec; | ||
|
||
/// A zero-copy representation of a little-endian UTF-16 string. | ||
/// | ||
/// Unlike `String`, the contents are not required to be valid UTF-16. Consumers | ||
/// are expected to validate the contents or use `try_into::<String>()`. No zero | ||
/// terminator is included. | ||
#[derive(Deserialize, Serialize)] | ||
#[serde(transparent)] | ||
pub struct ZeroUTF16String<'a> { | ||
#[serde(borrow)] | ||
units: ZeroVec<'a, u16>, | ||
} | ||
|
||
impl ZeroUTF16String<'_> { | ||
/// Gets whether the UTF-16 string is empty. | ||
pub fn is_empty(&self) -> bool { | ||
self.units.is_empty() | ||
} | ||
|
||
/// Gets the count of units in the string. | ||
/// | ||
/// This value does not necessarily equal the length of the string in | ||
/// characters, as characters outside the Basic Multilingual Plane are | ||
/// represented by 2 units. | ||
pub fn len(&self) -> usize { | ||
self.units.len() | ||
} | ||
|
||
/// Gets an iterator for the units of the string. | ||
/// | ||
/// See `len` for details on why this does not correspond to characters. | ||
pub fn iter(&self) -> impl Iterator<Item = u16> + '_ { | ||
self.units.iter() | ||
} | ||
} | ||
|
||
impl TryFrom<ZeroUTF16String<'_>> for String { | ||
type Error = DecodeUtf16Error; | ||
|
||
fn try_from(value: ZeroUTF16String<'_>) -> Result<Self, Self::Error> { | ||
char::decode_utf16(value.iter()).collect::<Result<String, _>>() | ||
} | ||
} | ||
|
||
impl Debug for ZeroUTF16String<'_> { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
let decoded = char::decode_utf16(self.iter()) | ||
.map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER)) | ||
.collect::<String>(); | ||
write!(f, "{}", decoded) | ||
} | ||
} | ||
|
||
#[derive(Debug, Deserialize, Serialize)] | ||
#[serde(rename_all = "camelCase")] | ||
pub struct TzDataRuleData<'a> { | ||
#[serde(borrow)] | ||
type_offsets: ZeroVec<'a, i32>, | ||
#[serde(borrow)] | ||
trans: Option<ZeroVec<'a, i32>>, | ||
#[serde(borrow)] | ||
trans_pre32: Option<ZeroVec<'a, i32>>, | ||
#[serde(borrow)] | ||
trans_post32: Option<ZeroVec<'a, i32>>, | ||
type_map: Option<&'a [u8]>, | ||
#[serde(borrow)] | ||
final_rule: Option<ZeroUTF16String<'a>>, | ||
final_raw: Option<i32>, | ||
final_year: Option<u32>, | ||
#[serde(borrow)] | ||
links: Option<ZeroVec<'a, u32>>, | ||
} | ||
|
||
#[derive(Debug)] | ||
pub enum TzDataRule<'a> { | ||
// The rule data is boxed here due to the large size difference between the | ||
// `TzDataRuleData` struct and `u32`. It's not strictly necessary. | ||
Table(Box<TzDataRuleData<'a>>), | ||
Int(u32), | ||
} | ||
|
||
impl<'de: 'a, 'a> Deserialize<'de> for TzDataRule<'a> { | ||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> | ||
where | ||
D: serde::Deserializer<'de>, | ||
{ | ||
deserializer.deserialize_any(TzDataRuleEnumVisitor { | ||
phantom: PhantomData, | ||
}) | ||
} | ||
} | ||
|
||
struct TzDataRuleEnumVisitor<'a> { | ||
phantom: PhantomData<TzDataRule<'a>>, | ||
} | ||
|
||
impl<'de: 'a, 'a> Visitor<'de> for TzDataRuleEnumVisitor<'a> { | ||
type Value = TzDataRule<'a>; | ||
|
||
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { | ||
formatter.write_str("an unsigned 32-bit integer or a table of rule data") | ||
} | ||
|
||
fn visit_u32<E>(self, v: u32) -> Result<Self::Value, E> | ||
where | ||
E: serde::de::Error, | ||
{ | ||
Ok(TzDataRule::Int(v)) | ||
} | ||
|
||
fn visit_map<A>(self, map: A) -> Result<Self::Value, A::Error> | ||
where | ||
A: serde::de::MapAccess<'de>, | ||
{ | ||
let value = TzDataRuleData::deserialize(de::value::MapAccessDeserializer::new(map))?; | ||
|
||
Ok(TzDataRule::Table(Box::new(value))) | ||
} | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
#[serde(rename = "zoneinfo64")] | ||
#[serde(rename_all = "PascalCase")] | ||
pub struct ZoneInfo64<'a> { | ||
#[serde(borrow)] | ||
pub zones: Vec<TzDataRule<'a>>, | ||
#[serde(borrow)] | ||
pub names: Vec<ZeroUTF16String<'a>>, | ||
#[serde(borrow)] | ||
pub rules: HashMap<&'a str, ZeroVec<'a, i32>>, | ||
#[serde(borrow)] | ||
pub regions: Vec<ZeroUTF16String<'a>>, | ||
} | ||
|
||
fn main() { | ||
let input = File::open("examples/data/zoneinfo64.res"); | ||
let mut reader = BufReader::new(input.unwrap()); | ||
|
||
let mut in_bytes = Vec::new(); | ||
reader | ||
.read_to_end(&mut in_bytes) | ||
.expect("Unable to read resource bundle file"); | ||
|
||
let out = resb::binary::from_bytes::<ZoneInfo64>(&in_bytes) | ||
.expect("Error processing resource bundle file"); | ||
|
||
println!("{:#?}", out); | ||
} |
Oops, something went wrong.