microsoft · rchiodo · Sep 22, 2020 · Sep 22, 2020 · Sep 22, 2020 · Sep 22, 2020
diff --git a/news/2 Fixes/5678.md b/news/2 Fixes/5678.md
@@ -0,0 +1 @@
+Fix escaping of output to encode HTML chars correctly.
diff --git a/src/client/datascience/jupyter/jupyterNotebook.ts b/src/client/datascience/jupyter/jupyterNotebook.ts
@@ -40,6 +40,10 @@ import { KernelConnectionMetadata } from './kernels/types';
 
 // tslint:disable-next-line: no-require-imports
 import cloneDeep = require('lodash/cloneDeep');
+// tslint:disable-next-line: no-require-imports
+import escape = require('lodash/escape');
+// tslint:disable-next-line: no-require-imports
+import unescape = require('lodash/unescape');
 import { concatMultilineString, formatStreamText } from '../../../datascience-ui/common';
 import { RefBool } from '../../common/refBool';
 import { PythonEnvironment } from '../../pythonEnvironments/info';
@@ -783,12 +787,12 @@ export class JupyterNotebookBase implements INotebook {
                 outputs.forEach((o) => {
                     if (o.output_type === 'stream') {
                         const stream = o as nbformat.IStream;
-                        result = result.concat(formatStreamText(concatMultilineString(stream.text, true)));
+                        result = result.concat(formatStreamText(unescape(concatMultilineString(stream.text, true))));
                     } else {
                         const data = o.data;
                         if (data && data.hasOwnProperty('text/plain')) {
                             // tslint:disable-next-line:no-any
-                            result = result.concat((data as any)['text/plain']);
+                            result = result.concat(unescape((data as any)['text/plain']));
                         }
                     }
                 });
@@ -1233,7 +1237,7 @@ export class JupyterNotebookBase implements INotebook {
     ) {
         // Check our length on text output
         if (msg.content.data && msg.content.data.hasOwnProperty('text/plain')) {
-            msg.content.data['text/plain'] = trimFunc(msg.content.data['text/plain'] as string);
+            msg.content.data['text/plain'] = escape(trimFunc(msg.content.data['text/plain'] as string));
         }
 
         this.addToCellData(
@@ -1262,7 +1266,7 @@ export class JupyterNotebookBase implements INotebook {
                 if (o.data && o.data.hasOwnProperty('text/plain')) {
                     // tslint:disable-next-line: no-any
                     const str = (o.data as any)['text/plain'].toString();
-                    const data = trimFunc(str) as string;
+                    const data = escape(trimFunc(str)) as string;
                     this.addToCellData(
                         cell,
                         {
@@ -1310,13 +1314,13 @@ export class JupyterNotebookBase implements INotebook {
                 : undefined;
         if (existing) {
             // tslint:disable-next-line:restrict-plus-operands
-            existing.text = existing.text + msg.content.text;
+            existing.text = existing.text + escape(msg.content.text);
             const originalText = formatStreamText(concatMultilineString(existing.text));
             originalTextLength = originalText.length;
             existing.text = trimFunc(originalText);
             trimmedTextLength = existing.text.length;
         } else {
-            const originalText = formatStreamText(concatMultilineString(msg.content.text));
+            const originalText = formatStreamText(concatMultilineString(escape(msg.content.text)));
             originalTextLength = originalText.length;
             // Create a new stream entry
             const output: nbformat.IStream = {
@@ -1346,6 +1350,11 @@ export class JupyterNotebookBase implements INotebook {
     }
 
     private handleDisplayData(msg: KernelMessage.IDisplayDataMsg, clearState: RefBool, cell: ICell) {
+        // Escape text output
+        if (msg.content.data && msg.content.data.hasOwnProperty('text/plain')) {
+            msg.content.data['text/plain'] = escape(msg.content.data['text/plain'] as string);
+        }
+
         const output: nbformat.IDisplayData = {
             output_type: 'display_data',
             data: msg.content.data,
@@ -1393,9 +1402,9 @@ export class JupyterNotebookBase implements INotebook {
     private handleError(msg: KernelMessage.IErrorMsg, clearState: RefBool, cell: ICell) {
         const output: nbformat.IError = {
             output_type: 'error',
-            ename: msg.content.ename,
-            evalue: msg.content.evalue,
-            traceback: msg.content.traceback
+            ename: escape(msg.content.ename),
+            evalue: escape(msg.content.evalue),
+            traceback: msg.content.traceback.map(escape)
         };
         this.addToCellData(cell, output, clearState);
         cell.state = CellState.error;

diff --git a/src/client/datascience/jupyter/kernelVariables.ts b/src/client/datascience/jupyter/kernelVariables.ts
@@ -6,6 +6,8 @@ import { inject, injectable } from 'inversify';
 import stripAnsi from 'strip-ansi';
 import * as uuid from 'uuid/v4';
 
+// tslint:disable-next-line: no-require-imports
+import unescape = require('lodash/unescape');
 import { CancellationToken, Event, EventEmitter, Uri } from 'vscode';
 import { PYTHON_LANGUAGE } from '../../common/constants';
 import { traceError } from '../../common/logger';
@@ -246,7 +248,7 @@ export class KernelVariables implements IJupyterVariables {
 
     // Pull our text result out of the Jupyter cell
     private deserializeJupyterResult<T>(cells: ICell[]): T {
-        const text = this.extractJupyterResultText(cells);
+        const text = unescape(this.extractJupyterResultText(cells));
         return JSON.parse(text) as T;
     }
 
@@ -371,7 +373,7 @@ export class KernelVariables implements IJupyterVariables {
         // Now execute the query
         if (notebook && query) {
             const cells = await notebook.execute(query.query, Identifiers.EmptyFileName, 0, uuid(), token, true);
-            const text = this.extractJupyterResultText(cells);
+            const text = unescape(this.extractJupyterResultText(cells));
 
             // Apply the expression to it
             const matches = this.getAllMatches(query.parser, text);

diff --git a/src/client/datascience/jupyter/kernels/cellExecution.ts b/src/client/datascience/jupyter/kernels/cellExecution.ts
@@ -35,6 +35,8 @@ import {
 import { IKernel } from './types';
 // tslint:disable-next-line: no-var-requires no-require-imports
 const vscodeNotebookEnums = require('vscode') as typeof import('vscode-proposed');
+// tslint:disable-next-line: no-require-imports
+import escape = require('lodash/escape');
 
 export class CellExecutionFactory {
     constructor(
@@ -406,6 +408,11 @@ export class CellExecution {
     // See this for docs on the messages:
     // https://jupyter-client.readthedocs.io/en/latest/messaging.html#messaging-in-jupyter
     private handleExecuteResult(msg: KernelMessage.IExecuteResultMsg, clearState: RefBool) {
+        // Escape text output
+        if (msg.content.data && msg.content.data.hasOwnProperty('text/plain')) {
+            msg.content.data['text/plain'] = escape(msg.content.data['text/plain'] as string);
+        }
+
         this.addToCellData(
             {
                 output_type: 'execute_result',
@@ -429,7 +436,7 @@ export class CellExecution {
                             // Mark as stream output so the text is formatted because it likely has ansi codes in it.
                             output_type: 'stream',
                             // tslint:disable-next-line: no-any
-                            text: (o.data as any)['text/plain'].toString(),
+                            text: escape((o.data as any)['text/plain'].toString()),
                             metadata: {},
                             execution_count: reply.execution_count
                         },
@@ -463,10 +470,10 @@ export class CellExecution {
             lastOutput && lastOutput.outputKind === CellOutputKind.Text ? lastOutput : undefined;
         if (existing) {
             // tslint:disable-next-line:restrict-plus-operands
-            existing.text = formatStreamText(concatMultilineString(existing.text + msg.content.text));
+            existing.text = formatStreamText(concatMultilineString(existing.text + escape(msg.content.text)));
             this.cell.outputs = [...this.cell.outputs]; // This is necessary to get VS code to update (for now)
         } else {
-            const originalText = formatStreamText(concatMultilineString(msg.content.text));
+            const originalText = formatStreamText(concatMultilineString(escape(msg.content.text)));
             // Create a new stream entry
             const output: nbformat.IStream = {
                 output_type: 'stream',
@@ -478,6 +485,11 @@ export class CellExecution {
     }
 
     private handleDisplayData(msg: KernelMessage.IDisplayDataMsg, clearState: RefBool) {
+        // Escape text output
+        if (msg.content.data && msg.content.data.hasOwnProperty('text/plain')) {
+            msg.content.data['text/plain'] = escape(msg.content.data['text/plain'] as string);
+        }
+
         const output: nbformat.IDisplayData = {
             output_type: 'display_data',
             data: msg.content.data,

diff --git a/src/client/datascience/jupyter/oldJupyterVariables.ts b/src/client/datascience/jupyter/oldJupyterVariables.ts
@@ -11,6 +11,8 @@ import { Event, EventEmitter, Uri } from 'vscode';
 import { PYTHON_LANGUAGE } from '../../common/constants';
 import { traceError } from '../../common/logger';
 
+// tslint:disable-next-line: no-require-imports
+import unescape = require('lodash/unescape');
 import { IConfigurationService } from '../../common/types';
 import * as localize from '../../common/utils/localize';
 import { EXTENSION_ROOT_DIR } from '../../constants';
@@ -232,7 +234,7 @@ export class OldJupyterVariables implements IJupyterVariables {
 
     // Pull our text result out of the Jupyter cell
     private deserializeJupyterResult<T>(cells: ICell[]): T {
-        const text = this.extractJupyterResultText(cells);
+        const text = unescape(this.extractJupyterResultText(cells));
         return JSON.parse(text) as T;
     }
 
@@ -357,7 +359,7 @@ export class OldJupyterVariables implements IJupyterVariables {
         // Now execute the query
         if (notebook && query) {
             const cells = await notebook.execute(query.query, Identifiers.EmptyFileName, 0, uuid(), undefined, true);
-            const text = this.extractJupyterResultText(cells);
+            const text = unescape(this.extractJupyterResultText(cells));
 
             // Apply the expression to it
             const matches = this.getAllMatches(query.parser, text);

diff --git a/src/datascience-ui/interactive-common/cellOutput.tsx b/src/datascience-ui/interactive-common/cellOutput.tsx
@@ -314,26 +314,34 @@ export class CellOutput extends React.Component<ICellOutputProps> {
             input = JSON.stringify(output.data);
             renderWithScrollbars = true;
             isText = true;
+        } else if (output.output_type === 'execute_result' && input && input.hasOwnProperty('text/plain')) {
+            // Plain text should actually be shown as html so that escaped HTML shows up correctly
+            mimeType = 'text/html';
+            isText = true;
+            isError = false;
+            renderWithScrollbars = true;
+            // tslint:disable-next-line: no-any
+            const text = (input as any)['text/plain'];
+            input = {
+                'text/html': text // XML tags should have already been escaped.
+            };
         } else if (output.output_type === 'stream') {
-            // Stream output needs to be wrapped in xmp so it
-            // show literally. Otherwise < chars start a new html element.
             mimeType = 'text/html';
             isText = true;
             isError = false;
             renderWithScrollbars = true;
             // Sonar is wrong, TS won't compile without this AS
             const stream = output as nbformat.IStream; // NOSONAR
-            const formatted = concatMultilineString(stream.text);
+            const concatted = concatMultilineString(stream.text);
             input = {
-                'text/html': formatted.includes('<') ? `<xmp>${formatted}</xmp>` : `<div>${formatted}</div>`
+                'text/html': concatted // XML tags should have already been escaped.
             };
 
-            // Output may have goofy ascii colorization chars in it. Try
-            // colorizing if we don't have html that needs <xmp> around it (ex. <type ='string'>)
+            // Output may have ascii colorization chars in it.
             try {
-                if (ansiRegex().test(formatted)) {
+                if (ansiRegex().test(concatted)) {
                     const converter = new CellOutput.ansiToHtmlClass(CellOutput.getAnsiToHtmlOptions());
-                    const html = converter.toHtml(formatted);
+                    const html = converter.toHtml(concatted);
                     input = {
                         'text/html': html
                     };

diff --git a/src/test/datascience/Untitled-1.ipynb b/src/test/datascience/Untitled-1.ipynb
@@ -0,0 +1,47 @@
+{
+ "metadata": {
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6-final"
+  },
+  "orig_nbformat": 2
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": "1"
+     },
+     "metadata": {},
+     "execution_count": 1
+    }
+   ],
+   "source": [
+    "a=1\n",
+    "a"
+   ]
+  }
+ ]
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fix escaping of output to encode HTML chars correctly.