Skip to content

Commit

Permalink
Code cleanup (flutter#96)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevmoo authored Apr 11, 2019
1 parent 38756a0 commit 2b88ac7
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 44 deletions.
40 changes: 0 additions & 40 deletions lib/src/char_encodings.dart

This file was deleted.

37 changes: 33 additions & 4 deletions lib/src/html_input_stream.dart
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import 'dart:collection';
import 'dart:convert' show ascii, utf8;

import 'package:source_span/source_span.dart';

import 'char_encodings.dart';
import 'constants.dart';
import 'encoding_parser.dart';
import 'utils.dart';
Expand Down Expand Up @@ -66,7 +66,7 @@ class HtmlInputStream {
this.sourceUrl])
: charEncodingName = codecName(encoding) {
if (source is String) {
_rawChars = toCodepoints(source);
_rawChars = source.runes.toList();
charEncodingName = 'utf-8';
charEncodingCertain = true;
} else if (source is List<int>) {
Expand All @@ -92,7 +92,7 @@ class HtmlInputStream {
_chars = <int>[];

if (_rawChars == null) {
_rawChars = decodeBytes(charEncodingName, _rawBytes);
_rawChars = _decodeBytes(charEncodingName, _rawBytes);
}

bool skipNewline = false;
Expand Down Expand Up @@ -177,7 +177,7 @@ class HtmlInputStream {
/// encoding otherwise return null.
String detectBOM() {
// Try detecting the BOM using bytes from the string
if (hasUtf8Bom(_rawBytes)) {
if (_hasUtf8Bom(_rawBytes)) {
return 'utf-8';
}
return null;
Expand Down Expand Up @@ -292,3 +292,32 @@ String codecName(String encoding) {
var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase();
return encodings[canonicalName];
}

/// Returns true if the [bytes] starts with a UTF-8 byte order mark.
/// Since UTF-8 doesn't have byte order, it's somewhat of a misnomer, but it is
/// used in HTML to detect the UTF-
bool _hasUtf8Bom(List<int> bytes, [int offset = 0, int length]) {
int end = length != null ? offset + length : bytes.length;
return (offset + 3) <= end &&
bytes[offset] == 0xEF &&
bytes[offset + 1] == 0xBB &&
bytes[offset + 2] == 0xBF;
}

/// Decodes the [bytes] with the provided [encoding] and returns an iterable for
/// the codepoints. Supports the major unicode encodings as well as ascii and
/// and windows-1252 encodings.
Iterable<int> _decodeBytes(String encoding, List<int> bytes) {
switch (encoding) {
case 'ascii':
return ascii.decode(bytes).runes;

case 'utf-8':
// NOTE: To match the behavior of the other decode functions, we eat the
// UTF-8 BOM here. This is the default behavior of `utf8.decode`.
return utf8.decode(bytes).runes;

default:
throw ArgumentError('Encoding $encoding not supported');
}
}

0 comments on commit 2b88ac7

Please sign in to comment.