This repository has been archived by the owner on Nov 16, 2020. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
241 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,98 @@ | ||
import Foundation | ||
|
||
extension String { | ||
/// Lookup table mapping the characters that need escaping to their escaped representation. | ||
private static let htmlEscapeMap: [UInt8: String] = [ | ||
UInt8(ascii: "&"): "&", | ||
UInt8(ascii: "\""): """, | ||
UInt8(ascii: "'"): "'", | ||
UInt8(ascii: "<"): "<", | ||
UInt8(ascii: ">"): ">"] | ||
|
||
/// Stores an inline byte array to avoid the memory overhead of using `[UInt8]`. | ||
private struct InlineByteArray { | ||
private(set) var eightBytes: Int64 = 0 | ||
private(set) var count: Int | ||
|
||
init(bytes: [UInt8]) { | ||
assert(bytes.count <= 8) | ||
self.count = bytes.count | ||
let selfPointer = UnsafeMutableRawPointer(&self) | ||
bytes.withUnsafeBytes { bytesPointer in | ||
selfPointer.copyMemory(from: bytesPointer.baseAddress!, byteCount: bytes.count) | ||
} | ||
} | ||
} | ||
|
||
private struct SixteenBytes { | ||
let firstEight: Int64 = 0 | ||
let secondEight: Int64 = 0 | ||
|
||
init() { } | ||
} | ||
|
||
/// Same as `htmlEscapeMap`, but stored as an array indexed from 0 to 255 to avoid dictionary lookups. | ||
/// In addition, we store `InlineByteArray`s instead of `String`s in order to avoid memory management overhead. | ||
/// If no escaping is required for a character, the character itself is stored. | ||
/// Using an array-typed lookup table is much faster than a dictionary-typed one or `if`-based branching. | ||
private static let htmlEscapeMapASCIIByteArray: [InlineByteArray] = (UInt8(0)...UInt8(255)).map { byte in | ||
if let escaped = String.htmlEscapeMap[byte] { | ||
return InlineByteArray(bytes: Array(escaped.utf8)) | ||
} else { | ||
return InlineByteArray(bytes: [byte]) | ||
} | ||
} | ||
|
||
/// Escapes HTML entities in a `String`. | ||
internal func htmlEscaped() -> String { | ||
/// FIXME: performance | ||
return replacingOccurrences(of: "&", with: "&") | ||
.replacingOccurrences(of: "\"", with: """) | ||
.replacingOccurrences(of: "'", with: "'") | ||
.replacingOccurrences(of: "<", with: "<") | ||
.replacingOccurrences(of: ">", with: ">") | ||
public func htmlEscaped() -> String { | ||
var expectedLength = 0 | ||
// Using `withUnsafeBufferPointer` is minimally faster than calling `String.htmlEscapeMapASCIILengths[Int(character)]` for each character. | ||
String.htmlEscapeMapASCIIByteArray.withUnsafeBufferPointer { lengths in | ||
for character in self.utf8 { | ||
expectedLength += lengths[Int(character)].count | ||
} | ||
} | ||
|
||
guard expectedLength != self.utf8.count else { | ||
// Shortcut: no replacements necessary; skip them altogether. | ||
return self | ||
} | ||
|
||
func writeEscapedString(_ resultBytes: UnsafeMutableRawPointer) -> Void { | ||
var raw = resultBytes | ||
let end = raw + expectedLength | ||
for character in self.utf8 { | ||
var escaped = String.htmlEscapeMapASCIIByteArray[Int(character)] | ||
assert(raw + escaped.count < end) | ||
raw.copyMemory(from: &escaped, byteCount: escaped.count) | ||
raw += escaped.count | ||
} | ||
} | ||
|
||
if expectedLength <= 15 { | ||
// Avoid the `Array<UInt8>` heap allocation for strings consisting | ||
// of at most 15 UTF-8 code units, where `String`'s small string | ||
// optimization avoids a memory allocation. | ||
// This provides another ~5x speedup compared to the "slow" path below. | ||
// Note: This might be slightly less efficient (but still correct, | ||
// and still faster than the slow path) for non-ASCII Strings on Swift 4.2. | ||
var resultData = SixteenBytes() | ||
let resultBytes = UnsafeMutableRawPointer(&resultData) | ||
writeEscapedString(resultBytes) | ||
|
||
// Note: Byte 16 should always be zero to make sure the string is null-terminated. | ||
// This is ensured by `raw + escaped.count < end = expectedLength <= 15` above. | ||
return String(cString: resultBytes.assumingMemoryBound(to: UInt8.self)) | ||
} else { | ||
var resultData = Array<UInt8>(repeating: 0, count: expectedLength) | ||
resultData.withUnsafeMutableBytes { | ||
writeEscapedString($0.baseAddress!) | ||
} | ||
|
||
// TODO: It might be possible to gain further improvements | ||
// by re-using the byte array allocated by `resultData` | ||
// to avoid copying the string's bytes here. | ||
return String(bytes: resultData, encoding: .utf8)! // Guaranteed to succeed. | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
import XCTest | ||
@testable import TemplateKitTests | ||
|
||
XCTMain([ | ||
testCase(TemplateDataEncoderTests.allTests), | ||
]) | ||
import TemplateKitTests | ||
|
||
var tests = [XCTestCaseEntry]() | ||
tests += TemplateKitTests.__allTests() | ||
|
||
XCTMain(tests) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
@testable import TemplateKit | ||
import XCTest | ||
|
||
class HTMLEscapeTests: XCTestCase { | ||
func testCorrectness() { | ||
XCTAssertEqual("".htmlEscaped(), "") | ||
XCTAssertEqual("abcdef".htmlEscaped(), "abcdef") | ||
XCTAssertEqual("abc&<>\"'".htmlEscaped(), "abc&<>"'") | ||
XCTAssertEqual("abc&".htmlEscaped(), "abc&") | ||
} | ||
|
||
func testShortStringNoReplacements() { | ||
let string = "abcde12345" | ||
measure { | ||
for _ in 0..<10_000_000 { | ||
_ = string.htmlEscaped() | ||
} | ||
} | ||
} | ||
|
||
func testShortStringWithReplacements() { | ||
// The result should still fit into 15 bytes to hit the in-place String storage optimization. | ||
let string = "<abcdef>" | ||
measure { | ||
for _ in 0..<1_000_000 { | ||
_ = string.htmlEscaped() | ||
} | ||
} | ||
} | ||
|
||
static let mediumStringNoReplacements: String = { | ||
let lowercase = Array(UInt8(ascii: "a")...UInt8(ascii: "z")) | ||
let digits = Array(UInt8(ascii: "0")...UInt8(ascii: "9")) | ||
let uppercase = Array(UInt8(ascii: "A")...UInt8(ascii: "Z")) | ||
|
||
return String(bytes: lowercase + digits + uppercase, encoding: .utf8)! | ||
}() | ||
|
||
func testMediumStringNoReplacements() { | ||
measure { | ||
for _ in 0..<2_000_000 { | ||
_ = HTMLEscapeTests.mediumStringNoReplacements.htmlEscaped() | ||
} | ||
} | ||
} | ||
|
||
static let mediumStringWithReplacements: String = { | ||
let lowercase = Array(UInt8(ascii: "a")...UInt8(ascii: "z")) | ||
let digits = Array(UInt8(ascii: "0")...UInt8(ascii: "9")) | ||
let uppercase = Array(UInt8(ascii: "A")...UInt8(ascii: "Z")) | ||
let allCharacters = [[UInt8(ascii: "&")], lowercase, [UInt8(ascii: "\"")], digits, [UInt8(ascii: "'")], uppercase, [UInt8(ascii: "<")], [UInt8(ascii: ">")]] | ||
.flatMap { $0 } | ||
|
||
return String(bytes: allCharacters, encoding: .utf8)! | ||
}() | ||
|
||
func testMediumStringWithReplacements() { | ||
measure { | ||
for _ in 0..<500_000 { | ||
_ = HTMLEscapeTests.mediumStringWithReplacements.htmlEscaped() | ||
} | ||
} | ||
} | ||
|
||
func testMediumStringWithOnlyReplacements() { | ||
let string = Array(repeating: "&<>\"'", count: 10).joined(separator: "") | ||
measure { | ||
for _ in 0..<500_000 { | ||
_ = string.htmlEscaped() | ||
} | ||
} | ||
} | ||
|
||
func testLongStringNoReplacements() { | ||
let longString = Array(repeating: HTMLEscapeTests.mediumStringNoReplacements, count: 20).joined(separator: "") | ||
measure { | ||
for _ in 0..<200_000 { | ||
_ = longString.htmlEscaped() | ||
} | ||
} | ||
} | ||
|
||
func testLongStringWithReplacements() { | ||
let longString = Array(repeating: HTMLEscapeTests.mediumStringWithReplacements, count: 20).joined(separator: "") | ||
measure { | ||
for _ in 0..<50_000 { | ||
_ = longString.htmlEscaped() | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#if !canImport(ObjectiveC) | ||
import XCTest | ||
|
||
extension HTMLEscapeTests { | ||
// DO NOT MODIFY: This is autogenerated, use: | ||
// `swift test --generate-linuxmain` | ||
// to regenerate. | ||
static let __allTests__HTMLEscapeTests = [ | ||
("testCorrectness", testCorrectness), | ||
("testLongStringNoReplacements", testLongStringNoReplacements), | ||
("testLongStringWithReplacements", testLongStringWithReplacements), | ||
("testMediumStringNoReplacements", testMediumStringNoReplacements), | ||
("testMediumStringWithOnlyReplacements", testMediumStringWithOnlyReplacements), | ||
("testMediumStringWithReplacements", testMediumStringWithReplacements), | ||
("testShortStringNoReplacements", testShortStringNoReplacements), | ||
("testShortStringWithReplacements", testShortStringWithReplacements), | ||
] | ||
} | ||
|
||
extension TemplateDataEncoderTests { | ||
// DO NOT MODIFY: This is autogenerated, use: | ||
// `swift test --generate-linuxmain` | ||
// to regenerate. | ||
static let __allTests__TemplateDataEncoderTests = [ | ||
("testArray", testArray), | ||
("testComplexEncodable", testComplexEncodable), | ||
("testDictionary", testDictionary), | ||
("testDouble", testDouble), | ||
("testEncodable", testEncodable), | ||
("testEncodeSuperCustomImplementation", testEncodeSuperCustomImplementation), | ||
("testEncodeSuperCustomImplementationWithSuperEncoder1", testEncodeSuperCustomImplementationWithSuperEncoder1), | ||
("testEncodeSuperCustomImplementationWithSuperEncoder2", testEncodeSuperCustomImplementationWithSuperEncoder2), | ||
("testEncodeSuperDefaultImplementation", testEncodeSuperDefaultImplementation), | ||
("testEncodingPerformanceExampleModel", testEncodingPerformanceExampleModel), | ||
("testEncodingPerformanceExampleModelJSONBaseline", testEncodingPerformanceExampleModelJSONBaseline), | ||
("testGH10", testGH10), | ||
("testGH20", testGH20), | ||
("testNestedArray", testNestedArray), | ||
("testNestedDictionary", testNestedDictionary), | ||
("testNestedEncodable", testNestedEncodable), | ||
("testString", testString), | ||
("testTemplabeByteScannerPeak", testTemplabeByteScannerPeak), | ||
] | ||
} | ||
|
||
public func __allTests() -> [XCTestCaseEntry] { | ||
return [ | ||
testCase(HTMLEscapeTests.__allTests__HTMLEscapeTests), | ||
testCase(TemplateDataEncoderTests.__allTests__TemplateDataEncoderTests), | ||
] | ||
} | ||
#endif |