Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(NODE-6355): respect utf8 validation options when iterating cursors #4214

Merged
merged 10 commits into from
Sep 4, 2024
26 changes: 23 additions & 3 deletions src/cmap/wire_protocol/on_demand/document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
getInt32LE,
ObjectId,
parseToElementsToArray,
pluckBSONSerializeOptions,
Timestamp,
toUTF8
} from '../../../bson';
Expand Down Expand Up @@ -45,6 +46,13 @@ type CachedBSONElement = { element: BSONElement; value: any | undefined };

/** @internal */
export class OnDemandDocument {
/**
* @internal
*
* Used for testing purposes.
*/
private static BSON: typeof BSON = BSON;
nbbeeken marked this conversation as resolved.
Show resolved Hide resolved

/**
* Maps JS strings to elements and jsValues for speeding up subsequent lookups.
* - If `false` then name does not exist in the BSON document
Expand Down Expand Up @@ -330,11 +338,23 @@ export class OnDemandDocument {
* @param options - BSON deserialization options
*/
public toObject(options?: BSONSerializeOptions): Record<string, any> {
return BSON.deserialize(this.bson, {
...options,
const exactBSONOptions = {
...pluckBSONSerializeOptions(options ?? {}),
validation: this.parseBsonSerializationOptions(options),
index: this.offset,
allowObjectSmallerThanBufferSize: true
});
};
return OnDemandDocument.BSON.deserialize(this.bson, exactBSONOptions);
}

private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
utf8: { writeErrors: false } | false;
} {
const enableUtf8Validation = options?.enableUtf8Validation;
if (enableUtf8Validation === false) {
return { utf8: false };
}
return { utf8: { writeErrors: false } };
}

/** Returns this document's bytes only */
Expand Down
19 changes: 0 additions & 19 deletions src/cmap/wire_protocol/responses.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {
type Document,
Long,
parseToElementsToArray,
pluckBSONSerializeOptions,
type Timestamp
} from '../../bson';
import { MongoUnexpectedServerResponseError } from '../../error';
Expand Down Expand Up @@ -166,24 +165,6 @@ export class MongoDBResponse extends OnDemandDocument {
}
return this.clusterTime ?? null;
}

public override toObject(options?: BSONSerializeOptions): Record<string, any> {
const exactBSONOptions = {
...pluckBSONSerializeOptions(options ?? {}),
validation: this.parseBsonSerializationOptions(options)
};
return super.toObject(exactBSONOptions);
}

private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
utf8: { writeErrors: false } | false;
} {
const enableUtf8Validation = options?.enableUtf8Validation;
if (enableUtf8Validation === false) {
return { utf8: false };
}
return { utf8: { writeErrors: false } };
}
}

/** @internal */
Expand Down
178 changes: 178 additions & 0 deletions test/integration/node-specific/bson-options/utf8_validation.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import { expect } from 'chai';
import * as net from 'net';
import * as sinon from 'sinon';

import {
BSON,
BSONError,
type Collection,
type MongoClient,
MongoDBResponse,
MongoServerError,
Expand Down Expand Up @@ -153,3 +156,178 @@ describe('class MongoDBResponse', () => {
}
);
});

describe('utf8 validation with cursors', function () {
let client: MongoClient;
let collection: Collection;

/**
* Inserts a document with malformed utf8 bytes. This method spies on socket.write, and then waits
* for an OP_MSG payload corresponding to `collection.insertOne({ field: 'é' })`, and then modifies the
* bytes of the character 'é', to produce invalid utf8.
*/
async function insertDocumentWithInvalidUTF8() {
const stub = sinon.stub(net.Socket.prototype, 'write').callsFake(function (...args) {
const providedBuffer = args[0].toString('hex');
const targetBytes = Buffer.from('é').toString('hex');

if (providedBuffer.includes(targetBytes)) {
if (providedBuffer.split(targetBytes).length !== 2) {
throw new Error('received buffer more than one `c3a9` sequences. or perhaps none?');
baileympearson marked this conversation as resolved.
Show resolved Hide resolved
}
const buffer = Buffer.from(providedBuffer.replace('c3a9', 'c301'), 'hex');
const result = stub.wrappedMethod.apply(this, [buffer]);
sinon.restore();
return result;
}
const result = stub.wrappedMethod.apply(this, args);
return result;
});

const document = {
field: 'é'
};

await collection.insertOne(document);

sinon.restore();
}

beforeEach(async function () {
client = this.configuration.newClient();
await client.connect();
const db = client.db('test');
collection = db.collection('invalidutf');

await collection.deleteMany({});
await insertDocumentWithInvalidUTF8();
});

afterEach(async function () {
sinon.restore();
await client.close();
nbbeeken marked this conversation as resolved.
Show resolved Hide resolved
});

context('when utf-8 validation is explicitly disabled', function () {
it('documents can be read using a for-await loop without errors', async function () {
for await (const _doc of collection.find({}, { enableUtf8Validation: false }));
});
it('documents can be read using next() without errors', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });

while (await cursor.hasNext()) {
await cursor.next();
}
});

it('documents can be read using toArray() without errors', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });
await cursor.toArray();
});

it('documents can be read using .stream() without errors', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });
await cursor.stream().toArray();
});

it('documents can be read with tryNext() without error', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });

while (await cursor.hasNext()) {
await cursor.tryNext();
}
});
});

async function expectReject(fn: () => Promise<void>) {
try {
await fn();
expect.fail('expected the provided callback function to reject, but it did not.');
} catch (error) {
expect(error).to.match(/Invalid UTF-8 string in BSON document/);
expect(error).to.be.instanceOf(BSONError);
}
}

context('when utf-8 validation is explicitly enabled', function () {
it('a for-await loop throw a BSON error', async function () {
await expectReject(async () => {
for await (const _doc of collection.find({}, { enableUtf8Validation: true }));
});
});
it('next() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });

while (await cursor.hasNext()) {
await cursor.next();
}
});
});

it('toArray() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });
await cursor.toArray();
});
});

it('.stream() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });
await cursor.stream().toArray();
});
});

it('tryNext() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });

while (await cursor.hasNext()) {
await cursor.tryNext();
}
});
});
});

context('utf-8 validation defaults to enabled', function () {
it('a for-await loop throw a BSON error', async function () {
await expectReject(async () => {
for await (const _doc of collection.find({}));
});
});
it('next() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({});

while (await cursor.hasNext()) {
await cursor.next();
}
});
});

it('toArray() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({});
await cursor.toArray();
});
});

it('.stream() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({});
await cursor.stream().toArray();
});
});

it('tryNext() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });

while (await cursor.hasNext()) {
await cursor.tryNext();
}
});
});
});
});
52 changes: 41 additions & 11 deletions test/unit/cmap/wire_protocol/responses.test.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import * as SPYABLE_BSON from 'bson';
import { expect } from 'chai';
import * as sinon from 'sinon';

Expand All @@ -16,39 +17,68 @@ describe('class MongoDBResponse', () => {
});

context('utf8 validation', () => {
afterEach(() => sinon.restore());
let deseriailzeSpy: sinon.SinonSpy;
beforeEach(function () {
// @ts-expect-error accessing internal property.
OnDemandDocument.BSON = SPYABLE_BSON;

deseriailzeSpy = sinon.spy(SPYABLE_BSON, 'deserialize');
});
afterEach(function () {
sinon.restore();
});

context('when enableUtf8Validation is not specified', () => {
const options = { enableUtf8Validation: undefined };
it('calls BSON deserialize with writeErrors validation turned off', () => {
const res = new MongoDBResponse(BSON.serialize({}));
const toObject = sinon.spy(Object.getPrototypeOf(Object.getPrototypeOf(res)), 'toObject');
res.toObject(options);
expect(toObject).to.have.been.calledWith(
sinon.match({ validation: { utf8: { writeErrors: false } } })
);

expect(deseriailzeSpy).to.have.been.called;

const [
{
args: [_buffer, { validation }]
}
] = deseriailzeSpy.getCalls();

expect(validation).to.deep.equal({ utf8: { writeErrors: false } });
});
});

context('when enableUtf8Validation is true', () => {
const options = { enableUtf8Validation: true };
it('calls BSON deserialize with writeErrors validation turned off', () => {
const res = new MongoDBResponse(BSON.serialize({}));
const toObject = sinon.spy(Object.getPrototypeOf(Object.getPrototypeOf(res)), 'toObject');
res.toObject(options);
expect(toObject).to.have.been.calledWith(
sinon.match({ validation: { utf8: { writeErrors: false } } })
);

expect(deseriailzeSpy).to.have.been.called;

const [
{
args: [_buffer, { validation }]
}
] = deseriailzeSpy.getCalls();

expect(validation).to.deep.equal({ utf8: { writeErrors: false } });
});
});

context('when enableUtf8Validation is false', () => {
const options = { enableUtf8Validation: false };
it('calls BSON deserialize with all validation disabled', () => {
const res = new MongoDBResponse(BSON.serialize({}));
const toObject = sinon.spy(Object.getPrototypeOf(Object.getPrototypeOf(res)), 'toObject');
res.toObject(options);
expect(toObject).to.have.been.calledWith(sinon.match({ validation: { utf8: false } }));

expect(deseriailzeSpy).to.have.been.called;

const [
{
args: [_buffer, { validation }]
}
] = deseriailzeSpy.getCalls();

expect(validation).to.deep.equal({ utf8: false });
});
});
});
Expand Down