Skip to content

Commit

Permalink
Fix a variety of character encoding issues (#4698)
Browse files Browse the repository at this point in the history
* add e2e test that demonstrates encoding issue

* fix all sorts of content-type wackiness, infer content-type from html

* update snapshot

* add kr, jp, cn tests

* update snapshot

* intercept any valid JS content-type

* PR review changes
  • Loading branch information
flotwig authored and brian-mann committed Jul 15, 2019
1 parent f518e6e commit 84f99e6
Show file tree
Hide file tree
Showing 11 changed files with 332 additions and 3 deletions.
79 changes: 79 additions & 0 deletions packages/server/__snapshots__/1_interception_spec.coffee.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
exports['e2e interception spec character encodings does not mangle non-UTF-8 text 1'] = `
====================================================================================================
(Run Starting)
┌────────────────────────────────────────────────────────────────────────────────────────────────┐
│ Cypress: 1.2.3 │
│ Browser: FooBrowser 88 │
│ Specs: 1 found (character_encoding_spec.js) │
│ Searched: cypress/integration/character_encoding_spec.js │
└────────────────────────────────────────────────────────────────────────────────────────────────┘
────────────────────────────────────────────────────────────────────────────────────────────────────
Running: character_encoding_spec.js... (1 of 1)
character encoding tests
without gzip
✓ iso-8859-1 works
✓ euc-kr works
✓ shift-jis works
✓ gb2312 works
with gzip
✓ iso-8859-1 works
✓ euc-kr works
✓ shift-jis works
✓ gb2312 works
without gzip (no content-type charset)
✓ iso-8859-1 works
✓ euc-kr works
✓ shift-jis works
✓ gb2312 works
with gzip (no content-type charset)
✓ iso-8859-1 works
✓ euc-kr works
✓ shift-jis works
✓ gb2312 works
16 passing
(Results)
┌──────────────────────────────────────────┐
│ Tests: 16 │
│ Passing: 16 │
│ Failing: 0 │
│ Pending: 0 │
│ Skipped: 0 │
│ Screenshots: 0 │
│ Video: true │
│ Duration: X seconds │
│ Spec Ran: character_encoding_spec.js │
└──────────────────────────────────────────┘
(Video)
- Started processing: Compressing to 32 CRF
- Finished processing: /foo/bar/.projects/e2e/cypress/videos/abc123.mp4 (X seconds)
====================================================================================================
(Run Finished)
Spec Tests Passing Failing Pending Skipped
┌────────────────────────────────────────────────────────────────────────────────────────────────┐
│ ✔ character_encoding_spec.js XX:XX 16 16 - - - │
└────────────────────────────────────────────────────────────────────────────────────────────────┘
All specs passed! XX:XX 16 16 - - -
`
38 changes: 36 additions & 2 deletions packages/server/lib/controllers/proxy.coffee
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
_ = require("lodash")
zlib = require("zlib")
charset = require("charset")
concat = require("concat-stream")
iconv = require("iconv-lite")
Promise = require("bluebird")
accept = require("http-accept")
debug = require("debug")("cypress:server:proxy")
Expand All @@ -22,6 +24,18 @@ zlibOptions = {
finishFlush: zlib.Z_SYNC_FLUSH
}

## https://github.com/cypress-io/cypress/issues/1543
getNodeCharsetFromResponse = (headers, body) ->
httpCharset = (charset(headers, body, 1024) || '').toLowerCase()

debug("inferred charset from response %o", { httpCharset })

if iconv.encodingExists(httpCharset)
return httpCharset

## browsers default to latin1
return "latin1"

isGzipError = (err) ->
Object.prototype.hasOwnProperty.call(zlib.constants, err.code)

Expand Down Expand Up @@ -119,6 +133,13 @@ module.exports = {
## make sure the response includes string type
contentType and contentType.includes(str)

resContentTypeIsJavaScript = (respHeaders) ->
_.some [
'application/javascript',
'application/x-javascript',
'text/javascript'
].map(_.partial(resContentTypeIs, respHeaders))

reqAcceptsHtml = ->
## don't inject if this is an XHR from jquery
return if req.headers["x-requested-with"]
Expand Down Expand Up @@ -176,7 +197,10 @@ module.exports = {
## bypass the stream buffer and pipe this back
if wantsInjection
rewrite = (body) ->
rewriter.html(body.toString("utf8"), remoteState.domainName, wantsInjection, wantsSecurityRemoved)
## transparently decode their body to a node string and then re-encode
nodeCharset = getNodeCharsetFromResponse(headers, body)
body = rewriter.html(iconv.decode(body, nodeCharset), remoteState.domainName, wantsInjection, wantsSecurityRemoved)
iconv.encode(body, nodeCharset)

## TODO: we can probably move this to the new
## replacestream rewriter instead of using
Expand Down Expand Up @@ -247,6 +271,16 @@ module.exports = {
onResponse = (str, incomingRes) =>
{headers, statusCode} = incomingRes

originalSetHeader = res.setHeader

## express does all kinds of silly/nasty stuff to the content-type...
## but we don't want to change it at all!
res.setHeader = (k, v) ->
if k == 'content-type'
v = incomingRes.headers['content-type']

originalSetHeader.call(res, k, v)

wantsInjection ?= do ->
return false if not resContentTypeIs(headers, "text/html")

Expand All @@ -263,7 +297,7 @@ module.exports = {
## on the response or its a request for any javascript script tag
config.modifyObstructiveCode and (
(wantsInjection is "full") or
resContentTypeIs(headers, "application/javascript")
resContentTypeIsJavaScript(headers)
)

@setResHeaders(req, res, incomingRes, wantsInjection)
Expand Down
2 changes: 2 additions & 0 deletions packages/server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"browserify": "13.3.0",
"chai": "1.10.0",
"chalk": "2.4.2",
"charset": "1.0.1",
"check-more-types": "2.24.0",
"chokidar": "3.0.1",
"cjsxify": "0.3.0",
Expand Down Expand Up @@ -87,6 +88,7 @@
"http-proxy": "1.17.0",
"http-status-codes": "1.3.2",
"human-interval": "0.1.6",
"iconv-lite": "0.5.0",
"image-size": "0.7.4",
"is-fork-pr": "2.3.0",
"is-html": "2.0.0",
Expand Down
63 changes: 63 additions & 0 deletions packages/server/test/e2e/1_interception_spec.coffee
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
compression = require("compression")
e2e = require("../support/helpers/e2e")
Fixtures = require("../support/helpers/fixtures")
path = require("path")

PORT = 9876

## based off of the most common encodings used on the Internet:
## https://w3techs.com/technologies/overview/character_encoding/all
## as of this writing, these tests will cover ~99% of websites
TEST_ENCODINGS = [
"iso-8859-1"
"euc-kr"
"shift-jis"
"gb2312"
]

e2ePath = Fixtures.projectPath("e2e")

fullController = (charset) ->
(req, res) ->
res.set({ 'content-type': "text/html;charset=#{charset}" });
res.sendFile(path.join(e2ePath, "static/charsets/#{charset}.html"))

pageOnlyController = (charset) ->
(req, res) ->
res.set()
res.sendFile(path.join(e2ePath, "static/charsets/#{charset}.html"), {
headers: { 'content-type': "text/html" }
})

describe "e2e interception spec", ->
e2e.setup
servers: [
{
onServer: (app) ->
TEST_ENCODINGS.forEach (enc) ->
app.get "/#{enc}.html", fullController(enc)

app.use "/#{enc}.html.gz", compression()
app.get "/#{enc}.html.gz", fullController(enc)

app.get "/#{enc}.html.pageonly", pageOnlyController(enc)

app.use "/#{enc}.html.gz.pageonly", compression()
app.get "/#{enc}.html.gz.pageonly", pageOnlyController(enc)

port: PORT
}
]

context "character encodings", ->
## https://github.com/cypress-io/cypress/issues/1543
it "does not mangle non-UTF-8 text", ->
e2e.exec(@, {
spec: "character_encoding_spec.js"
config: {
defaultCommandTimeout: 100
baseUrl: "http://localhost:9876"
}
snapshot: true
expectedExitCode: 0
})
1 change: 0 additions & 1 deletion packages/server/test/e2e/6_visit_spec.coffee
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
useragent = require("express-useragent")
Fixtures = require("../support/helpers/fixtures")
e2e = require("../support/helpers/e2e")

onServer = (app) ->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
describe('character encoding tests', () => {
[
{
title: 'without gzip',
extension: '.html',
},
{
title: 'with gzip',
extension: '.html.gz',
},
{
title: 'without gzip (no content-type charset)',
extension: '.html.pageonly',
},
{
title: 'with gzip (no content-type charset)',
extension: '.html.gz.pageonly',
},
].forEach(({ title, extension }) => {
context(title, () => {
it('iso-8859-1 works', () => {
cy.visit(`/iso-8859-1${extension}`)
cy.get('#t1').should('have.html', 'Olá Mundo')
cy.get('#t2').should('have.html', 'Ç')
cy.get('#t3').should('have.html', 'Pêssego')
})

it('euc-kr works', () => {
cy.visit(`/euc-kr${extension}`)
cy.get('.text').should('contain.html', '서울 남산케이블카 운행')
})

it('shift-jis works', () => {
cy.visit(`/shift-jis${extension}`)
cy.get('body').should('contain.html', '総合サポート・お問い合わせ')
})

it('gb2312 works', () => {
cy.visit(`/gb2312${extension}`)
cy.get('h3').should('contain.html', '雨果主题展8月启幕')
})
})
})
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<!DOCTYPE html>
<html data-useragent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0" lang="ko"><head>
<meta http-equiv="content-type" content="text/html; charset=EUC-KR">
<meta charset="euc-kr">

<title>네이버 뉴스</title>
</head>
<body>
<div class="text">
서울 남산케이블카 운행 중 안전펜스 부딪혀…7명 부상(종합)
</div>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="X-UA-Compatible" content="IE=EmulateIE7">
<meta http-equiv="Content-Type" content="text/html; charset=GBK">
<title>东方网</title>
</head>
<body>
<h3>雨果主题展8月启幕</h3>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>


<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1" />



<title> Cypress ISO 8859-1 Characters Test</title>



</head>

<body class="bodyLogon">

<h1>
Cypress ISO 8859-1 Characters Test
</h1>
<ul>

<li id='t1'>Olá Mundo</li>
<li id='t2'>Ç</li>
<li id='t3'>Pêssego</li>
<li id='t4'>Pão</li>
<li id='t5'>Gestão</li>
<li id='t6'>Distribuição</li>

</ul>




</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="ja">
<head>
<meta http-equiv="content-type" content="text/html; charset=Shift_JIS">
<meta charset="shift_jis">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<title>ソニー製品情報・ソニーストア - ソニー</title>
</head>
<body style="overflow: visible;">
総合サポート・お問い合わせ
</body>
</html>
Loading

4 comments on commit 84f99e6

@cypress-bot
Copy link
Contributor

@cypress-bot cypress-bot bot commented on 84f99e6 Jul 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Circle has built the linux x64 version of the Test Runner.

You can install this pre-release platform-specific build using instructions at https://on.cypress.io/installing-cypress#Install-pre-release-version.

You will need to use custom CYPRESS_INSTALL_BINARY url and install Cypress using an url instead of the version.

export CYPRESS_INSTALL_BINARY=https://cdn.cypress.io/beta/binary/3.4.1/linux-x64/circle-develop-84f99e6e8752e5e422e298b0e894930c275023ee-133976/cypress.zip
npm install https://cdn.cypress.io/beta/npm/3.4.1/circle-develop-84f99e6e8752e5e422e298b0e894930c275023ee-133963/cypress.tgz

@cypress-bot
Copy link
Contributor

@cypress-bot cypress-bot bot commented on 84f99e6 Jul 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AppVeyor has built the win32 ia32 version of the Test Runner.

You can install this pre-release platform-specific build using instructions at https://on.cypress.io/installing-cypress#Install-pre-release-version.

You will need to use custom CYPRESS_INSTALL_BINARY url and install Cypress using an url instead of the version.

set CYPRESS_INSTALL_BINARY=https://cdn.cypress.io/beta/binary/3.4.1/win32-ia32/appveyor-develop-84f99e6e8752e5e422e298b0e894930c275023ee-25991836/cypress.zip
npm install https://cdn.cypress.io/beta/binary/3.4.1/win32-ia32/appveyor-develop-84f99e6e8752e5e422e298b0e894930c275023ee-25991836/cypress.zip

@cypress-bot
Copy link
Contributor

@cypress-bot cypress-bot bot commented on 84f99e6 Jul 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AppVeyor has built the win32 x64 version of the Test Runner.

You can install this pre-release platform-specific build using instructions at https://on.cypress.io/installing-cypress#Install-pre-release-version.

You will need to use custom CYPRESS_INSTALL_BINARY url and install Cypress using an url instead of the version.

set CYPRESS_INSTALL_BINARY=https://cdn.cypress.io/beta/binary/3.4.1/win32-x64/appveyor-develop-84f99e6e8752e5e422e298b0e894930c275023ee-25991836/cypress.zip
npm install https://cdn.cypress.io/beta/binary/3.4.1/win32-x64/appveyor-develop-84f99e6e8752e5e422e298b0e894930c275023ee-25991836/cypress.zip

@cypress-bot
Copy link
Contributor

@cypress-bot cypress-bot bot commented on 84f99e6 Jul 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Circle has built the darwin x64 version of the Test Runner.

You can install this pre-release platform-specific build using instructions at https://on.cypress.io/installing-cypress#Install-pre-release-version.

You will need to use custom CYPRESS_INSTALL_BINARY url and install Cypress using an url instead of the version.

export CYPRESS_INSTALL_BINARY=https://cdn.cypress.io/beta/binary/3.4.1/darwin-x64/circle-develop-84f99e6e8752e5e422e298b0e894930c275023ee-133985/cypress.zip
npm install https://cdn.cypress.io/beta/npm/3.4.1/circle-develop-84f99e6e8752e5e422e298b0e894930c275023ee-133984/cypress.tgz

Please sign in to comment.