Skip to content

Commit

Permalink
Fix/buffer base64 url decoding (#777)
Browse files Browse the repository at this point in the history
* Add support for decoding URL safe base64 encoded strings

* Fix base64 url decoding

* Remove unneeded return

---------

Co-authored-by: Ainsley Rutterford <ainsley.rutterford@gmail.com>
  • Loading branch information
richarddavison and ainsleyrutterford authored Jan 10, 2025
1 parent 47baedc commit 5fd46ac
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 18 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions libs/llrt_encoding/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ repository = "https://github.com/awslabs/llrt"
base64-simd = "0.8"
hex-simd = "0.8"
phf = { version = "0.11", features = ["macros"] }
memchr = "2"

[build-dependencies]
llrt_build = { version = "0.4.0-beta", path = "../llrt_build" }
38 changes: 28 additions & 10 deletions libs/llrt_encoding/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ impl Encoder {

pub fn decode<'a, T: Into<Cow<'a, [u8]>>>(&self, bytes: T) -> Result<Vec<u8>, String> {
match self {
Self::Hex => bytes_from_hex(&bytes.into()),
Self::Base64 => bytes_from_b64(&bytes.into()),
Self::Hex => bytes_from_hex(bytes),
Self::Base64 => bytes_from_b64(bytes),
Self::Utf8 | Self::Windows1252 | Self::Utf16le | Self::Utf16be => {
Ok(bytes.into().into())
},
Expand All @@ -99,8 +99,8 @@ impl Encoder {

pub fn decode_from_string(&self, string: String) -> Result<Vec<u8>, String> {
match self {
Self::Hex => bytes_from_hex(string.as_bytes()),
Self::Base64 => bytes_from_b64(string.as_bytes()),
Self::Hex => bytes_from_hex(string.into_bytes()),
Self::Base64 => bytes_from_b64(string.into_bytes()),
Self::Utf8 | Self::Windows1252 => Ok(string.into_bytes()),
Self::Utf16le => Ok(string
.encode_utf16()
Expand Down Expand Up @@ -129,16 +129,34 @@ pub fn bytes_to_hex(bytes: &[u8]) -> Vec<u8> {
hex_simd::encode_type(bytes, AsciiCase::Lower)
}

pub fn bytes_from_hex(hex_bytes: &[u8]) -> Result<Vec<u8>, String> {
hex_simd::decode_to_vec(hex_bytes).map_err(|err| err.to_string())
pub fn bytes_from_hex<'a, T: Into<Cow<'a, [u8]>>>(hex_bytes: T) -> Result<Vec<u8>, String> {
hex_simd::decode_to_vec(hex_bytes.into()).map_err(|err| err.to_string())
}

pub fn bytes_to_b64_string(bytes: &[u8]) -> String {
base64_simd::STANDARD.encode_to_string(bytes)
pub fn bytes_from_b64<'a, T: Into<Cow<'a, [u8]>>>(base64_bytes: T) -> Result<Vec<u8>, String> {
let bytes: Cow<'a, [u8]> = base64_bytes.into();

//need to collect since memchr2_iter is borrowing bytes. This is fine since we're unlikely to contain url safe base64
let url_safe_byte_positions: Vec<usize> = memchr::memchr2_iter(b'-', b'_', &bytes).collect();

if url_safe_byte_positions.is_empty() {
return base64_simd::forgiving_decode_to_vec(&bytes).map_err(|e| e.to_string());
}

//doesn't allocate for already owned data
let mut bytes = bytes.into_owned();
for pos in url_safe_byte_positions {
bytes[pos] = match bytes[pos] {
b'-' => b'+',
b'_' => b'/',
_ => unreachable!(),
};
}
base64_simd::forgiving_decode_to_vec(&bytes).map_err(|e| e.to_string())
}

pub fn bytes_from_b64(bytes: &[u8]) -> Result<Vec<u8>, String> {
base64_simd::forgiving_decode_to_vec(bytes).map_err(|e| e.to_string())
pub fn bytes_to_b64_string(bytes: &[u8]) -> String {
base64_simd::STANDARD.encode_to_string(bytes)
}

pub fn bytes_to_b64(bytes: &[u8]) -> Vec<u8> {
Expand Down
5 changes: 3 additions & 2 deletions modules/llrt_buffer/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl<'js> Buffer {
) -> Result<Value<'js>> {
if let Some(encoding) = encoding {
let encoder = Encoder::from_str(&encoding).or_throw(ctx)?;
bytes = encoder.decode(&bytes).or_throw(ctx)?;
bytes = encoder.decode(bytes).or_throw(ctx)?;
}
Buffer(bytes).into_js(ctx)
}
Expand Down Expand Up @@ -284,7 +284,7 @@ fn from<'js>(
== Some(stringify!(Buffer))
|| encoding.is_some()
{
let bytes = bytes.to_vec();
let bytes = bytes.into();
return Buffer::from_encoding(&ctx, bytes, encoding)?.into_js(&ctx);
} else {
let (array_buffer, _, source_offset) = ab_bytes.get_array_buffer()?.unwrap(); //we know it's an array buffer
Expand Down Expand Up @@ -328,6 +328,7 @@ fn set_prototype<'js>(ctx: &Ctx<'js>, constructor: Object<'js>) -> Result<()> {
}

pub fn atob(ctx: Ctx<'_>, encoded_value: Coerced<String>) -> Result<rquickjs::String<'_>> {
//fine to pass a slice here since we won't copy if not base64
let vec = bytes_from_b64(encoded_value.as_bytes()).or_throw(&ctx)?;
// SAFETY: QuickJS will replace invalid characters with U+FFFD
let str = unsafe { String::from_utf8_unchecked(vec) };
Expand Down
10 changes: 5 additions & 5 deletions modules/llrt_http/src/fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ where
async move {
let options = options?;

if options.url.starts_with("data:") {
return parse_data_url(&ctx, &options.url);
if let Some(data_url) = options.url.strip_prefix("data:") {
return parse_data_url(&ctx, data_url);
}

let initial_uri: Uri = options.url.parse().or_throw(&ctx)?;
Expand Down Expand Up @@ -112,7 +112,7 @@ where
}

fn parse_data_url<'js>(ctx: &Ctx<'js>, data_url: &str) -> Result<Response<'js>> {
let (mime_type, data) = data_url["data:".len()..]
let (mime_type, data) = data_url
.split_once(',')
.ok_or_else(|| Exception::throw_type(ctx, "Invalid data URL format"))?;

Expand All @@ -139,9 +139,9 @@ fn parse_data_url<'js>(ctx: &Ctx<'js>, data_url: &str) -> Result<Response<'js>>
};

let body = if is_base64 {
bytes_from_b64(data.as_bytes()).map_err(|err| Exception::throw_message(ctx, &err))?
bytes_from_b64(data.as_bytes()).or_throw(ctx)?
} else {
data.as_bytes().to_vec()
data.as_bytes().into()
};

let blob = Blob::from_bytes(body, Some(content_type.clone())).into_js(ctx)?;
Expand Down
22 changes: 21 additions & 1 deletion tests/unit/buffer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,34 @@ describe("Buffer.from", () => {
it("should create a buffer from a string with base64 encoding", () => {
const input = "SGVsbG8sIHdvcmxkIQ==";
const buffer = Buffer.from(input, "base64");

expect(buffer.toString()).toEqual("Hello, world!");

const input2 = "SGVsbG8sIHdvcmxkIQ";
const buffer2 = Buffer.from(input2, "base64");
expect(buffer2.toString()).toEqual("Hello, world!");
});

it("should create a buffer from a string with base64 encoding that contains / or +", () => {
const input = "PD8+MTIz";
const buffer = Buffer.from(input, "base64");
expect(buffer.toString()).toEqual("<?>123");

const input3 = "PD8/PjEyMw==";
const buffer3 = Buffer.from(input3, "base64");
expect(buffer3.toString()).toEqual("<??>123");
});

// https://en.wikipedia.org/wiki/Base64#URL_applications
it("should create a buffer from a string with URL safe base64 encoding that contains _ or -", () => {
const input = "PD8-MTIz";
const buffer = Buffer.from(input, "base64");
expect(buffer.toString()).toEqual("<?>123");

const input3 = "PD8_PjEyMw";
const buffer3 = Buffer.from(input3, "base64");
expect(buffer3.toString()).toEqual("<??>123");
});

it("should create a buffer from a string with hex encoding", () => {
const input = "48656c6c6f2c20776f726c6421";
const buffer = Buffer.from(input, "hex");
Expand Down

0 comments on commit 5fd46ac

Please sign in to comment.