Skip to content

Commit 8bfe187

Browse files
fix(NODE-6355): respect utf8 validation options when iterating cursors (#4214)
1 parent fb13ebf commit 8bfe187

File tree

4 files changed

+261
-52
lines changed

4 files changed

+261
-52
lines changed

src/cmap/wire_protocol/on_demand/document.ts

+17-4
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
import {
22
Binary,
3-
BSON,
43
type BSONElement,
54
BSONError,
65
type BSONSerializeOptions,
76
BSONType,
7+
deserialize,
88
getBigInt64LE,
99
getFloat64LE,
1010
getInt32LE,
1111
ObjectId,
1212
parseToElementsToArray,
13+
pluckBSONSerializeOptions,
1314
Timestamp,
1415
toUTF8
1516
} from '../../../bson';
@@ -330,11 +331,23 @@ export class OnDemandDocument {
330331
* @param options - BSON deserialization options
331332
*/
332333
public toObject(options?: BSONSerializeOptions): Record<string, any> {
333-
return BSON.deserialize(this.bson, {
334-
...options,
334+
const exactBSONOptions = {
335+
...pluckBSONSerializeOptions(options ?? {}),
336+
validation: this.parseBsonSerializationOptions(options),
335337
index: this.offset,
336338
allowObjectSmallerThanBufferSize: true
337-
});
339+
};
340+
return deserialize(this.bson, exactBSONOptions);
341+
}
342+
343+
private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
344+
utf8: { writeErrors: false } | false;
345+
} {
346+
const enableUtf8Validation = options?.enableUtf8Validation;
347+
if (enableUtf8Validation === false) {
348+
return { utf8: false };
349+
}
350+
return { utf8: { writeErrors: false } };
338351
}
339352

340353
/** Returns this document's bytes only */

src/cmap/wire_protocol/responses.ts

-19
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import {
55
type Document,
66
Long,
77
parseToElementsToArray,
8-
pluckBSONSerializeOptions,
98
type Timestamp
109
} from '../../bson';
1110
import { MongoUnexpectedServerResponseError } from '../../error';
@@ -166,24 +165,6 @@ export class MongoDBResponse extends OnDemandDocument {
166165
}
167166
return this.clusterTime ?? null;
168167
}
169-
170-
public override toObject(options?: BSONSerializeOptions): Record<string, any> {
171-
const exactBSONOptions = {
172-
...pluckBSONSerializeOptions(options ?? {}),
173-
validation: this.parseBsonSerializationOptions(options)
174-
};
175-
return super.toObject(exactBSONOptions);
176-
}
177-
178-
private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
179-
utf8: { writeErrors: false } | false;
180-
} {
181-
const enableUtf8Validation = options?.enableUtf8Validation;
182-
if (enableUtf8Validation === false) {
183-
return { utf8: false };
184-
}
185-
return { utf8: { writeErrors: false } };
186-
}
187168
}
188169

189170
/** @internal */

test/integration/node-specific/bson-options/utf8_validation.test.ts

+185-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
import { expect } from 'chai';
2+
import * as net from 'net';
23
import * as sinon from 'sinon';
4+
import { inspect } from 'util';
35

46
import {
57
BSON,
8+
BSONError,
9+
type Collection,
10+
deserialize,
611
type MongoClient,
7-
MongoDBResponse,
812
MongoServerError,
13+
OnDemandDocument,
914
OpMsgResponse
1015
} from '../../../mongodb';
1116

@@ -23,12 +28,12 @@ describe('class MongoDBResponse', () => {
2328
let bsonSpy: sinon.SinonSpy;
2429

2530
beforeEach(() => {
26-
bsonSpy = sinon.spy(MongoDBResponse.prototype, 'parseBsonSerializationOptions');
31+
// @ts-expect-error private function
32+
bsonSpy = sinon.spy(OnDemandDocument.prototype, 'parseBsonSerializationOptions');
2733
});
2834

2935
afterEach(() => {
3036
bsonSpy?.restore();
31-
// @ts-expect-error: Allow this to be garbage collected
3237
bsonSpy = null;
3338
});
3439

@@ -153,3 +158,180 @@ describe('class MongoDBResponse', () => {
153158
}
154159
);
155160
});
161+
162+
describe('utf8 validation with cursors', function () {
163+
let client: MongoClient;
164+
let collection: Collection;
165+
166+
/**
167+
* Inserts a document with malformed utf8 bytes. This method spies on socket.write, and then waits
168+
* for an OP_MSG payload corresponding to `collection.insertOne({ field: 'é' })`, and then modifies the
169+
* bytes of the character 'é', to produce invalid utf8.
170+
*/
171+
async function insertDocumentWithInvalidUTF8() {
172+
const stub = sinon.stub(net.Socket.prototype, 'write').callsFake(function (...args) {
173+
const providedBuffer = args[0].toString('hex');
174+
const targetBytes = Buffer.from(document.field, 'utf-8').toString('hex');
175+
176+
if (providedBuffer.includes(targetBytes)) {
177+
if (providedBuffer.split(targetBytes).length !== 2) {
178+
sinon.restore();
179+
const message = `too many target bytes sequences: received ${providedBuffer.split(targetBytes).length}\n. command: ${inspect(deserialize(args[0]), { depth: Infinity })}`;
180+
throw new Error(message);
181+
}
182+
const buffer = Buffer.from(providedBuffer.replace(targetBytes, 'c301'.repeat(8)), 'hex');
183+
const result = stub.wrappedMethod.apply(this, [buffer]);
184+
sinon.restore();
185+
return result;
186+
}
187+
const result = stub.wrappedMethod.apply(this, args);
188+
return result;
189+
});
190+
191+
const document = {
192+
field: 'é'.repeat(8)
193+
};
194+
195+
await collection.insertOne(document);
196+
197+
sinon.restore();
198+
}
199+
200+
beforeEach(async function () {
201+
client = this.configuration.newClient();
202+
await client.connect();
203+
const db = client.db('test');
204+
collection = db.collection('invalidutf');
205+
206+
await collection.deleteMany({});
207+
await insertDocumentWithInvalidUTF8();
208+
});
209+
210+
afterEach(async function () {
211+
sinon.restore();
212+
await client.close();
213+
});
214+
215+
context('when utf-8 validation is explicitly disabled', function () {
216+
it('documents can be read using a for-await loop without errors', async function () {
217+
for await (const _doc of collection.find({}, { enableUtf8Validation: false }));
218+
});
219+
it('documents can be read using next() without errors', async function () {
220+
const cursor = collection.find({}, { enableUtf8Validation: false });
221+
222+
while (await cursor.hasNext()) {
223+
await cursor.next();
224+
}
225+
});
226+
227+
it('documents can be read using toArray() without errors', async function () {
228+
const cursor = collection.find({}, { enableUtf8Validation: false });
229+
await cursor.toArray();
230+
});
231+
232+
it('documents can be read using .stream() without errors', async function () {
233+
const cursor = collection.find({}, { enableUtf8Validation: false });
234+
await cursor.stream().toArray();
235+
});
236+
237+
it('documents can be read with tryNext() without error', async function () {
238+
const cursor = collection.find({}, { enableUtf8Validation: false });
239+
240+
while (await cursor.hasNext()) {
241+
await cursor.tryNext();
242+
}
243+
});
244+
});
245+
246+
async function expectReject(fn: () => Promise<void>) {
247+
try {
248+
await fn();
249+
expect.fail('expected the provided callback function to reject, but it did not.');
250+
} catch (error) {
251+
expect(error).to.match(/Invalid UTF-8 string in BSON document/);
252+
expect(error).to.be.instanceOf(BSONError);
253+
}
254+
}
255+
256+
context('when utf-8 validation is explicitly enabled', function () {
257+
it('a for-await loop throw a BSON error', async function () {
258+
await expectReject(async () => {
259+
for await (const _doc of collection.find({}, { enableUtf8Validation: true }));
260+
});
261+
});
262+
it('next() throws a BSON error', async function () {
263+
await expectReject(async () => {
264+
const cursor = collection.find({}, { enableUtf8Validation: true });
265+
266+
while (await cursor.hasNext()) {
267+
await cursor.next();
268+
}
269+
});
270+
});
271+
272+
it('toArray() throws a BSON error', async function () {
273+
await expectReject(async () => {
274+
const cursor = collection.find({}, { enableUtf8Validation: true });
275+
await cursor.toArray();
276+
});
277+
});
278+
279+
it('.stream() throws a BSONError', async function () {
280+
await expectReject(async () => {
281+
const cursor = collection.find({}, { enableUtf8Validation: true });
282+
await cursor.stream().toArray();
283+
});
284+
});
285+
286+
it('tryNext() throws a BSONError', async function () {
287+
await expectReject(async () => {
288+
const cursor = collection.find({}, { enableUtf8Validation: true });
289+
290+
while (await cursor.hasNext()) {
291+
await cursor.tryNext();
292+
}
293+
});
294+
});
295+
});
296+
297+
context('utf-8 validation defaults to enabled', function () {
298+
it('a for-await loop throw a BSON error', async function () {
299+
await expectReject(async () => {
300+
for await (const _doc of collection.find({}));
301+
});
302+
});
303+
it('next() throws a BSON error', async function () {
304+
await expectReject(async () => {
305+
const cursor = collection.find({});
306+
307+
while (await cursor.hasNext()) {
308+
await cursor.next();
309+
}
310+
});
311+
});
312+
313+
it('toArray() throws a BSON error', async function () {
314+
await expectReject(async () => {
315+
const cursor = collection.find({});
316+
await cursor.toArray();
317+
});
318+
});
319+
320+
it('.stream() throws a BSONError', async function () {
321+
await expectReject(async () => {
322+
const cursor = collection.find({});
323+
await cursor.stream().toArray();
324+
});
325+
});
326+
327+
it('tryNext() throws a BSONError', async function () {
328+
await expectReject(async () => {
329+
const cursor = collection.find({}, { enableUtf8Validation: true });
330+
331+
while (await cursor.hasNext()) {
332+
await cursor.tryNext();
333+
}
334+
});
335+
});
336+
});
337+
});

0 commit comments

Comments
 (0)