Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix handling of deeply nested tags #113

Merged
merged 1 commit into from
Apr 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Unreleased

# 2.1.0

* Bump minimum supported Rust version to 1.30.
* Fix a potential DoS attack from pathologically nested input.

# 2.0.0

Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "ammonia"
version = "2.0.0"
version = "2.1.0"
authors = ["Michael Howell <michael@notriddle.com>"]
description = "HTML Sanitization"
keywords = [ "sanitization", "html", "security", "xss" ]
Expand All @@ -11,7 +11,7 @@ repository = "https://github.com/rust-ammonia/ammonia"
categories = [ "web-programming", "text-processing" ]

[dependencies]
html5ever = "0.22"
html5ever = "0.22.8"
maplit = "1.0"
matches = "0.1.6"
tendril = "0.4"
Expand Down
48 changes: 42 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1292,6 +1292,7 @@ impl<'a> Builder<'a> {
/// without having to break Ammonia's API.
fn clean_dom(&self, mut dom: RcDom) -> Document {
let mut stack = Vec::new();
let mut removed = Vec::new();
let link_rel = self.link_rel
.map(|link_rel| format_tendril!("{}", link_rel));
if link_rel.is_some() {
Expand Down Expand Up @@ -1330,11 +1331,16 @@ impl<'a> Builder<'a> {
.into_iter()
.rev(),
);
// This design approach is used to prevent pathological content from producing
// a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`,
// of course, contains nodes that need to be dropped (we can't just drop them,
// because they could have a very deep child tree).
while let Some(mut node) = stack.pop() {
let parent = node.parent
.replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed")
.upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped");
if self.clean_node_content(&node) {
removed.push(node);
continue;
}
let pass = self.clean_child(&mut node);
Expand All @@ -1351,8 +1357,19 @@ impl<'a> Builder<'a> {
.into_iter()
.rev(),
);
if !pass {
removed.push(node);
}
}
// Now, imperatively clean up all of the child nodes.
// Otherwise, we could wind up with a DoS, either caused by a memory leak,
// or caused by a stack overflow.
while let Some(node) = stack.pop() {
removed.extend_from_slice(
&replace(&mut *node.children.borrow_mut(), Vec::new())[..]
);
}
Document(body)
Document(dom)
}

/// Returns `true` if a node and all its content should be removed.
Expand Down Expand Up @@ -1716,8 +1733,7 @@ impl<T> AttributeFilter for T where T: for<'a> Fn(&str, &str, &'a str) -> Option
/// let document = Builder::new()
/// .clean(input);
/// assert_eq!(document.to_string(), output);
#[derive(Clone)]
pub struct Document(Handle);
pub struct Document(RcDom);

impl Document {
/// Serializes a `Document` instance to a `String`.
Expand All @@ -1740,7 +1756,7 @@ impl Document {
pub fn to_string(&self) -> String {
let opts = Self::serialize_opts();
let mut ret_val = Vec::new();
serialize(&mut ret_val, &self.0, opts)
serialize(&mut ret_val, &self.0.document.children.borrow()[0], opts)
.expect("Writing to a string shouldn't fail (expect on OOM)");
String::from_utf8(ret_val)
.expect("html5ever only supports UTF8")
Expand Down Expand Up @@ -1776,7 +1792,7 @@ impl Document {
W: io::Write,
{
let opts = Self::serialize_opts();
serialize(writer, &self.0, opts)
serialize(writer, &self.0.document.children.borrow()[0], opts)
}

/// Exposes the `Document` instance as an [`html5ever::rcdom::Handle`][h].
Expand Down Expand Up @@ -1840,14 +1856,22 @@ impl Document {
/// # fn main() { do_main().unwrap() }
#[cfg(ammonia_unstable)]
pub fn to_dom_node(&self) -> Handle {
self.0.clone()
self.0.document.children.borrow()[0].clone()
}

fn serialize_opts() -> SerializeOpts {
SerializeOpts::default()
}
}

impl Clone for Document {
fn clone(&self) -> Self {
let parser = Builder::make_parser();
let dom = parser.one(&self.to_string()[..]);
Document(dom)
}
}

impl fmt::Display for Document {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.to_string())
Expand All @@ -1870,6 +1894,18 @@ impl From<Document> for String {
mod test {
use super::*;
#[test]
fn deeply_nested_whitelisted() {
clean(&"<b>".repeat(60_000));
}
#[test]
fn deeply_nested_blacklisted() {
clean(&"<b-b>".repeat(60_000));
}
#[test]
fn deeply_nested_alternating() {
clean(&"<b-b>".repeat(35_000));
}
#[test]
fn included_angles() {
let fragment = "1 < 2";
let result = clean(fragment);
Expand Down