diff --git a/package-lock.json b/package-lock.json index fba4180ed..46c23cc1a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -18,6 +18,7 @@ "@octokit/rest": "^18.12.0", "@opengovsg/formsg-sdk": "^0.11.0", "@opengovsg/sgid-client": "^2.0.0", + "@types/dompurify": "^3.0.5", "auto-bind": "^4.0.0", "aws-lambda": "^1.0.7", "aws-sdk": "^2.1428.0", @@ -37,6 +38,7 @@ "crypto-js": "^4.2.0", "dd-trace": "^4.7.0", "debug": "~2.6.9", + "dompurify": "^3.0.9", "dotenv": "^16.3.1", "eventsource": "^2.0.2", "exponential-backoff": "^3.1.0", @@ -4456,9 +4458,9 @@ } }, "node_modules/@types/dompurify": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/@types/dompurify/-/dompurify-2.4.0.tgz", - "integrity": "sha512-IDBwO5IZhrKvHFUl+clZxgf3hn2b/lU6H1KaBShPkQyGJUQ0xwebezIPSuiyGwfz1UzJWQl4M7BDxtHtCCPlTg==", + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/@types/dompurify/-/dompurify-3.0.5.tgz", + "integrity": "sha512-1Wg0g3BtQF7sSb27fJQAKck1HECM6zV1EB66j8JH9i3LCjYabJa0FSdiSgsD5K/RbrsR0SiraKacLB+T8ZVYAg==", "dependencies": { "@types/trusted-types": "*" } @@ -7056,6 +7058,11 @@ "node": ">=8" } }, + "node_modules/dompurify": { + "version": "3.0.9", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.0.9.tgz", + "integrity": "sha512-uyb4NDIvQ3hRn6NiC+SIFaP4mJ/MdXlvtunaqK9Bn6dD3RuB/1S/gasEjDHD8eiaqdSael2vBv+hOs7Y+jhYOQ==" + }, "node_modules/dotenv": { "version": "16.3.1", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.3.1.tgz", @@ -9731,6 +9738,14 @@ "node": ">= 10" } }, + "node_modules/isomorphic-dompurify/node_modules/@types/dompurify": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/@types/dompurify/-/dompurify-2.4.0.tgz", + "integrity": "sha512-IDBwO5IZhrKvHFUl+clZxgf3hn2b/lU6H1KaBShPkQyGJUQ0xwebezIPSuiyGwfz1UzJWQl4M7BDxtHtCCPlTg==", + "dependencies": { + "@types/trusted-types": "*" + } + }, "node_modules/isomorphic-dompurify/node_modules/acorn": { "version": "8.10.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.10.0.tgz", diff --git a/package.json b/package.json index 828791cf2..7451cdc3b 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ "@octokit/rest": "^18.12.0", "@opengovsg/formsg-sdk": "^0.11.0", "@opengovsg/sgid-client": "^2.0.0", + "@types/dompurify": "^3.0.5", "auto-bind": "^4.0.0", "aws-lambda": "^1.0.7", "aws-sdk": "^2.1428.0", @@ -53,6 +54,7 @@ "crypto-js": "^4.2.0", "dd-trace": "^4.7.0", "debug": "~2.6.9", + "dompurify": "^3.0.9", "dotenv": "^16.3.1", "eventsource": "^2.0.2", "exponential-backoff": "^3.1.0", diff --git a/src/services/fileServices/MdPageServices/MediaFileService.js b/src/services/fileServices/MdPageServices/MediaFileService.js index 5a401c093..4e2d1742c 100644 --- a/src/services/fileServices/MdPageServices/MediaFileService.js +++ b/src/services/fileServices/MdPageServices/MediaFileService.js @@ -41,18 +41,31 @@ class MediaFileService { throw new BadRequestError("File did not pass virus scan") } } + // Sanitize and validate file - const sanitizedContent = await validateAndSanitizeFileUpload(content) - if (!sanitizedContent) { + const sanitisationResult = await validateAndSanitizeFileUpload(content) + if (!sanitisationResult) { throw new MediaTypeError(`File extension is not within the approved list`) } + + const { + content: sanitizedContent, + detectedFileType: { ext }, + } = sanitisationResult + // NOTE: We construct the extension based off what we detect as the file type + const constructedFileName = `${fileName + .split(".") + .slice(0, -1) + .join(".")}.${ext}` + const { sha } = await this.repoService.create(sessionData, { content: sanitizedContent, - fileName, + fileName: constructedFileName, directoryName, isMedia: true, }) - return { name: fileName, content, sha } + + return { name: constructedFileName, content, sha } } async read(sessionData, { fileName, directoryName }) { @@ -64,10 +77,17 @@ class MediaFileService { async update(sessionData, { fileName, directoryName, content, sha }) { this.mediaNameChecks({ directoryName, fileName }) - const sanitizedContent = await validateAndSanitizeFileUpload(content) - if (!sanitizedContent) { + const sanitisationResult = await validateAndSanitizeFileUpload(content) + if (!sanitisationResult) { throw new MediaTypeError(`File extension is not within the approved list`) } + const { + content: sanitizedContent, + detectedFileType: { ext }, + } = sanitisationResult + + // NOTE: We can trust the user input here + // as we are removing stuff from our system. await this.repoService.delete(sessionData, { sha, fileName, @@ -75,7 +95,7 @@ class MediaFileService { }) const { sha: newSha } = await this.repoService.create(sessionData, { content: sanitizedContent, - fileName, + fileName: `${fileName.split(".").slice(0, -1).join(".")}.${ext}`, directoryName, isMedia: true, }) diff --git a/src/services/fileServices/MdPageServices/__tests__/MediaFileService.spec.js b/src/services/fileServices/MdPageServices/__tests__/MediaFileService.spec.js index 6319ba6cc..df4cb9175 100644 --- a/src/services/fileServices/MdPageServices/__tests__/MediaFileService.spec.js +++ b/src/services/fileServices/MdPageServices/__tests__/MediaFileService.spec.js @@ -2,6 +2,10 @@ const { config } = require("@config/config") const { BadRequestError } = require("@errors/BadRequestError") +const { + MediaFileService, +} = require("@services/fileServices/MdPageServices/MediaFileService") + const GITHUB_ORG_NAME = config.get("github.orgName") describe("Media File Service", () => { @@ -9,13 +13,14 @@ describe("Media File Service", () => { const accessToken = "test-token" const imageName = "test image.png" const imageEncodedName = encodeURIComponent(imageName) - const fileName = "test file.pdf" + const fileName = "test file.jpg" const fileEncodedName = encodeURIComponent(fileName) const directoryName = "images/subfolder" - const mockContent = "schema, test" - const mockSanitizedContent = "sanitized-test" const sha = "12345" const mockGithubSessionData = "githubData" + const mockContent = + "" + const mockSanitizedContent = mockContent.split(",")[1] const sessionData = { siteName, accessToken } @@ -34,22 +39,16 @@ describe("Media File Service", () => { updateRepoState: jest.fn(), } + // NOTE: Mock just the scan function + // as we want to omit network calls. jest.mock("@utils/file-upload-utils", () => ({ - validateAndSanitizeFileUpload: jest - .fn() - .mockReturnValue(mockSanitizedContent), - ALLOWED_FILE_EXTENSIONS: ["pdf"], + ...jest.requireActual("@utils/file-upload-utils"), scanFileForVirus: jest.fn().mockReturnValue({ CleanResult: true }), })) - const { - MediaFileService, - } = require("@services/fileServices/MdPageServices/MediaFileService") - const service = new MediaFileService({ repoService: mockRepoService, }) - const { validateAndSanitizeFileUpload } = require("@utils/file-upload-utils") beforeEach(() => { jest.clearAllMocks() @@ -66,15 +65,16 @@ describe("Media File Service", () => { ).rejects.toThrowError(BadRequestError) }) - mockRepoService.create.mockResolvedValueOnce({ sha }) it("Creating pages works correctly", async () => { - await expect( - service.create(sessionData, { - fileName, - directoryName, - content: mockContent, - }) - ).resolves.toMatchObject({ + // Arrange + mockRepoService.create.mockResolvedValueOnce({ sha }) + + const result = await service.create(sessionData, { + fileName, + directoryName, + content: mockContent, + }) + expect(result).toMatchObject({ name: fileName, content: mockContent, sha, @@ -85,7 +85,24 @@ describe("Media File Service", () => { directoryName, isMedia: true, }) - expect(validateAndSanitizeFileUpload).toHaveBeenCalledWith(mockContent) + }) + + it("should ignore the extension provided by the user", async () => { + // Arrange + mockRepoService.create.mockResolvedValueOnce({ sha }) + const fileNameWithWrongExt = "wrong.html" + + // Act + const result = await service.create(sessionData, { + fileName: fileNameWithWrongExt, + directoryName, + content: mockContent, + }) + + // Assert + // NOTE: The original extension here is not used. + // Instead, we use the inferred extension. + expect(result.name).toBe("wrong.jpg") }) }) @@ -153,8 +170,8 @@ describe("Media File Service", () => { describe("Update", () => { const oldSha = "54321" - mockRepoService.create.mockResolvedValueOnce({ sha }) it("Updating media file content works correctly", async () => { + mockRepoService.create.mockResolvedValueOnce({ sha }) await expect( service.update(sessionData, { fileName, @@ -179,7 +196,6 @@ describe("Media File Service", () => { directoryName, isMedia: true, }) - expect(validateAndSanitizeFileUpload).toHaveBeenCalledWith(mockContent) }) }) @@ -197,7 +213,7 @@ describe("Media File Service", () => { }) describe("Rename", () => { - const oldFileName = "test old file.pdf" + const oldFileName = "test old file.jpg" it("rejects renaming to page names with special characters", async () => { await expect( diff --git a/src/services/utilServices/Sanitizer.ts b/src/services/utilServices/Sanitizer.ts index d3e744f80..4db87076d 100644 --- a/src/services/utilServices/Sanitizer.ts +++ b/src/services/utilServices/Sanitizer.ts @@ -1,7 +1,7 @@ import DOMPurify from "isomorphic-dompurify" DOMPurify.setConfig({ - ADD_TAGS: ["iframe", "#comment", "script"], + ADD_TAGS: ["iframe", "script"], ADD_ATTR: [ "allow", "allowfullscreen", diff --git a/src/utils/__tests__/file-upload-utils.spec.ts b/src/utils/__tests__/file-upload-utils.spec.ts new file mode 100644 index 000000000..754f6066b --- /dev/null +++ b/src/utils/__tests__/file-upload-utils.spec.ts @@ -0,0 +1,52 @@ +import { validateAndSanitizeFileUpload } from "../file-upload-utils" + +describe("file-upload-utils", () => { + it("should preserve the original file type for binary format files", async () => { + // Arrange + const content = + "" + const expectedExtension = "png" + + // Act + const result = await validateAndSanitizeFileUpload(content) + + // Assert + expect(result).toBeDefined() + expect(result?.detectedFileType.ext).toBe(expectedExtension) + }) + + it("should sanitize svgs with alerts", async () => { + // Arrange + // NOTE: This is a svg with an `alert` in html comments + const maliciousContent = `data:application/pdf;base64,PCEtLQphbGVydChkb2N1bWVudC5kb21haW4pCi8qCi0tPgo8c3ZnIGlkPSJzdmcyIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA5MDAgOTAwIiB2ZXJzaW9uPSIxLjEiPgo8L3N2Zz4KPCEtLSAqLyAKLS0+Cg==` + const expectedExtension = "svg" + + // Act + const result = await validateAndSanitizeFileUpload(maliciousContent) + const sanitisedContent = Buffer.from(result?.content, "base64").toString() + + // Assert + expect(result).toBeDefined() + expect(result?.detectedFileType.ext).toBe(expectedExtension) + expect(sanitisedContent).not.toContain("alert") + }) + + it("should sanitize svgs with scripts", async () => { + // Arrange + // NOTE: Our sanitizer for files/html allows script tag + // however, the svg sanitizer does not as it is stricter + // and to prevent xss attacks. + const maliciousContent = `data:image/png;base64,${Buffer.from( + `` + ).toString("base64")}` + const expectedExtension = "svg" + + // Act + const result = await validateAndSanitizeFileUpload(maliciousContent) + + // Assert + expect(result).toBeDefined() + expect(result?.detectedFileType.ext).toBe(expectedExtension) + expect(result).not.toContain("script") + }) +}) diff --git a/src/utils/__tests__/markdown-utils.spec.ts b/src/utils/__tests__/markdown-utils.spec.ts index 85101269c..9475ea8ad 100644 --- a/src/utils/__tests__/markdown-utils.spec.ts +++ b/src/utils/__tests__/markdown-utils.spec.ts @@ -103,8 +103,8 @@ describe("Sanitized markdown utils test", () => { ) }) - it("should inject a html comment tag when the string is empty", () => { - expect(sanitizer.sanitize("")).toBe(HTML_COMMENT_TAG) + it("should not perform sanitisation when the string is empty", () => { + expect(sanitizer.sanitize("")).toBe("") }) }) }) diff --git a/src/utils/file-upload-utils.js b/src/utils/file-upload-utils.js index e00291ded..d77170b85 100644 --- a/src/utils/file-upload-utils.js +++ b/src/utils/file-upload-utils.js @@ -1,12 +1,14 @@ const CloudmersiveVirusApiClient = require("cloudmersive-virus-api-client") +const DOMPurify = require("dompurify") const FileType = require("file-type") const isSvg = require("is-svg") +const { JSDOM } = require("jsdom") const { config } = require("@config/config") -const logger = require("@logger/logger").default +const { window } = new JSDOM("") -const { sanitizer } = require("@services/utilServices/Sanitizer") +const logger = require("@logger/logger").default const CLOUDMERSIVE_API_KEY = config.get("cloudmersiveKey") @@ -31,6 +33,12 @@ apikey.apiKey = CLOUDMERSIVE_API_KEY const apiInstance = new CloudmersiveVirusApiClient.ScanApi() +// NOTE: This is NOT the default sanitiser; +// instead, we are creaitng our own instance of DOMPurify +// so that we can make it stricter solely +// for fileuploads. +const sanitizer = DOMPurify(window) + const scanFileForVirus = (fileBuffer, timeout) => { if (timeout) { defaultCloudmersiveClient.timeout = timeout @@ -54,16 +62,28 @@ const validateAndSanitizeFileUpload = async (data) => { const [, content] = data.split(",") const fileBuffer = Buffer.from(content, "base64") const detectedFileType = await FileType.fromBuffer(fileBuffer) - + // NOTE: This check is required for svg files. + // This is because svg files are a string based data type + // and not binary based. + // Hence, `FileType` wouldn't be able to detect the correct + // file type for svg files. if (isSvg(fileBuffer)) { + // NOTE: `isSvg` checks only using the first element, + // which is insufficient to guarantee that this file is safe. + // We run it thru the sanitizer again to ensure that the output + // is safe. const sanitizedBuffer = sanitizer.sanitize(fileBuffer) - return Buffer.from(sanitizedBuffer, "utf8").toString("base64") + return { + content: Buffer.from(sanitizedBuffer, "utf8").toString("base64"), + detectedFileType: { ext: "svg" }, + } } + if ( detectedFileType && ALLOWED_FILE_EXTENSIONS.includes(detectedFileType.ext) ) { - return content + return { content, detectedFileType } } return undefined