Skip to content

fix: Use correct limit when retrying a limit query stream with a cursor #2203

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions dev/src/reference/query-util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ export class QueryUtil<
const startTime = Date.now();
const isExplain = explainOptions !== undefined;

let numDocumentsReceived = 0;
let lastReceivedDocument: QueryDocumentSnapshot<
AppModelType,
DbModelType
Expand Down Expand Up @@ -239,6 +240,7 @@ export class QueryUtil<
);
}

++numDocumentsReceived;
callback(undefined, output);

if (proto.done) {
Expand Down Expand Up @@ -317,6 +319,12 @@ export class QueryUtil<
stream.destroy(err);
streamActive.resolve(/* active= */ false);
} else if (lastReceivedDocument && retryWithCursor) {
if (query instanceof VectorQuery) {
throw new Error(
'Unimplemented: Vector query does not support cursors yet.'
);
}

logger(
'Query._stream',
tag,
Expand All @@ -330,12 +338,28 @@ export class QueryUtil<
// the query cursor. Note that we do not use backoff here. The
// call to `requestStream()` will backoff should the restart
// fail before delivering any results.
let newQuery: Query<AppModelType, DbModelType> = query;
if (this._queryOptions.limit) {
const newLimit =
this._queryOptions.limit - numDocumentsReceived;
if (
this._queryOptions.limitType === undefined ||
this._queryOptions.limitType === LimitType.First
) {
newQuery = query.limit(newLimit);
} else {
newQuery = query.limitToLast(newLimit);
}
}

if (this._queryOptions.requireConsistency) {
request = query
request = newQuery
.startAfter(lastReceivedDocument)
.toProto(lastReceivedDocument.readTime);
} else {
request = query.startAfter(lastReceivedDocument).toProto();
request = newQuery
.startAfter(lastReceivedDocument)
.toProto();
}

// Set lastReceivedDocument to null before each retry attempt to ensure the retry makes progress
Expand Down
260 changes: 210 additions & 50 deletions dev/test/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {describe, it, beforeEach, afterEach} from 'mocha';
import {expect, use} from 'chai';
import * as chaiAsPromised from 'chai-as-promised';
import * as extend from 'extend';
import * as assert from 'assert';

import {firestore, google} from '../protos/firestore_v1_proto_api';
import {
Expand Down Expand Up @@ -3564,6 +3565,7 @@ describe('collectionGroup queries', () => {

describe('query resumption', () => {
let firestore: Firestore;
const RETRYABLE_ERROR_DOMAIN = 'RETRYABLE_ERROR_DOMAIN';

beforeEach(() => {
setTimeoutHandler(setImmediate);
Expand All @@ -3577,70 +3579,130 @@ describe('query resumption', () => {
setTimeoutHandler(setTimeout);
});

// Prevent regression of
// https://github.com/googleapis/nodejs-firestore/issues/1790
it('results should not be double produced on retryable error with back pressure', async () => {
// Generate the IDs of the documents that will match the query.
const documentIds = Array.from(new Array(500), (_, index) => `doc${index}`);

// Finds the index in `documentIds` of the document referred to in the
// "startAt" of the given request.
function getStartAtDocumentIndex(
request: api.IRunQueryRequest
): number | null {
const startAt = request.structuredQuery?.startAt;
const startAtValue = startAt?.values?.[0]?.referenceValue;
const startAtBefore = startAt?.before;
if (typeof startAtValue !== 'string') {
return null;
}
const docId = startAtValue.split('/').pop()!;
const docIdIndex = documentIds.indexOf(docId);
if (docIdIndex < 0) {
return null;
}
return startAtBefore ? docIdIndex : docIdIndex + 1;
// Return `numDocs` document responses, followed by an error response.
function* getDocResponsesFollowedByError(
documentIds: string[],
numDocs: number,
startAtEnd: boolean
): Generator<api.IRunQueryResponse | Error> {
assert(numDocs <= documentIds.length);
const sliced = startAtEnd
? documentIds.slice(-1 * numDocs)
: documentIds.slice(0, numDocs);
let runQueryResponses = sliced.map(documentId => result(documentId));
if (startAtEnd) {
runQueryResponses = runQueryResponses.reverse();
}
for (const runQueryResponse of runQueryResponses) {
yield runQueryResponse;
}
const retryableError = new GoogleError('simulated retryable error');
retryableError.domain = RETRYABLE_ERROR_DOMAIN;
yield retryableError;
}

const RETRYABLE_ERROR_DOMAIN = 'RETRYABLE_ERROR_DOMAIN';
// Returns the documents from the given `documentIds` that match the `startAt`
// (or `endAt`) and the `limit` (if any) of the given request.
function* getDocResponsesForRequest(
request: api.IRunQueryRequest,
documentIds: string[]
): Generator<api.IRunQueryResponse> {
if (request.structuredQuery?.startAt) {
const startAtDocumentIndex = getStartAtDocumentIndex(
request,
documentIds
);
if (startAtDocumentIndex === null) {
throw new Error('the request should specify a valid startAt');
}

// A mock replacement for Query._isPermanentRpcError which (a) resolves
// a promise once invoked and (b) treats a specific error "domain" as
// non-retryable.
function mockIsPermanentRpcError(err: GoogleError): boolean {
mockIsPermanentRpcError.invoked.resolve(true);
return err?.domain !== RETRYABLE_ERROR_DOMAIN;
}
mockIsPermanentRpcError.invoked = new Deferred();
let end: number | undefined = undefined;
if (request.structuredQuery?.limit?.value) {
end = startAtDocumentIndex + request.structuredQuery?.limit!.value;
}

// Return the first half of the documents, followed by a retryable error.
function* getRequest1Responses(): Generator<api.IRunQueryResponse | Error> {
const runQueryResponses = documentIds
.slice(0, documentIds.length / 2)
.slice(startAtDocumentIndex, end)
.map(documentId => result(documentId));
for (const runQueryResponse of runQueryResponses) {
yield runQueryResponse;
}
const retryableError = new GoogleError('simulated retryable error');
retryableError.domain = RETRYABLE_ERROR_DOMAIN;
yield retryableError;
}
} else if (request.structuredQuery?.endAt) {
const endAtDocumentIndex = getEndAtDocumentIndex(request, documentIds);
if (endAtDocumentIndex === null) {
throw new Error('the request should specify a valid endAt');
}

// Return the remaining documents.
function* getRequest2Responses(
request: api.IRunQueryRequest
): Generator<api.IRunQueryResponse> {
const startAtDocumentIndex = getStartAtDocumentIndex(request);
if (startAtDocumentIndex === null) {
throw new Error('request #2 should specify a valid startAt');
let begin: number | undefined = undefined;
if (request.structuredQuery?.limit?.value) {
begin = endAtDocumentIndex - request.structuredQuery?.limit!.value;
}

const runQueryResponses = documentIds
.slice(startAtDocumentIndex)
.slice(begin, endAtDocumentIndex)
.map(documentId => result(documentId));
for (const runQueryResponse of runQueryResponses) {
for (const runQueryResponse of runQueryResponses.reverse()) {
yield runQueryResponse;
}
} else {
throw new Error('the request does not specify a valid startAt or endAt');
}
}

// Finds the index in `documentIds` of the document referred to in the
// "startAt" of the given request. Returns `null` if it cannot find one.
function getStartAtDocumentIndex(
request: api.IRunQueryRequest,
documentIds: string[]
): number | null {
const startAt = request.structuredQuery?.startAt;
const startAtValue = startAt?.values?.[0]?.referenceValue;
const startAtBefore = startAt?.before;
if (typeof startAtValue !== 'string') {
return null;
}
const docId = startAtValue.split('/').pop()!;
const docIdIndex = documentIds.indexOf(docId);
if (docIdIndex < 0) {
return null;
}
return startAtBefore ? docIdIndex : docIdIndex + 1;
}

// Finds the index in `documentIds` of the document referred to in the
// "endAt" of the given request. Returns `null` if it cannot find one.
function getEndAtDocumentIndex(
request: api.IRunQueryRequest,
documentIds: string[]
): number | null {
const endAt = request.structuredQuery?.endAt;
const endAtValue = endAt?.values?.[0]?.referenceValue;
const endAtBefore = endAt?.before;
if (typeof endAtValue !== 'string') {
return null;
}
const docId = endAtValue.split('/').pop()!;
const docIdIndex = documentIds.indexOf(docId);
if (docIdIndex < 0) {
return null;
}
return endAtBefore ? docIdIndex : docIdIndex - 1;
}

// Prevent regression of
// https://github.com/googleapis/nodejs-firestore/issues/1790
it('results should not be double produced on retryable error with back pressure', async () => {
// Generate the IDs of the documents that will match the query.
const documentIds = Array.from(new Array(500), (_, index) => `doc${index}`);

// A mock replacement for Query._isPermanentRpcError which (a) resolves
// a promise once invoked and (b) treats a specific error "domain" as
// non-retryable.
function mockIsPermanentRpcError(err: GoogleError): boolean {
mockIsPermanentRpcError.invoked.resolve(true);
return err?.domain !== RETRYABLE_ERROR_DOMAIN;
}
mockIsPermanentRpcError.invoked = new Deferred();

// Set up the mocked responses from Watch.
let requestNum = 0;
Expand All @@ -3649,9 +3711,17 @@ describe('query resumption', () => {
requestNum++;
switch (requestNum) {
case 1:
return stream(...getRequest1Responses());
// Return the first half of the documents, followed by a retryable error.
return stream(
...getDocResponsesFollowedByError(
documentIds,
250,
/*startAtEnd*/ false
)
);
case 2:
return stream(...getRequest2Responses(request!));
// Return the remaining documents.
return stream(...getDocResponsesForRequest(request!, documentIds));
default:
throw new Error(`should never get here (requestNum=${requestNum})`);
}
Expand All @@ -3677,4 +3747,94 @@ describe('query resumption', () => {
const actualDocumentIds = snapshots.map(snapshot => snapshot.id);
expect(actualDocumentIds).to.eql(documentIds);
});

it('resuming queries with a cursor should respect the original query limit', async () => {
// Generate the IDs of the documents that will match the query.
const documentIds = Array.from(new Array(500), (_, index) => `doc${index}`);

// Set up the mocked responses from Watch.
let requestNum = 0;
const overrides: ApiOverride = {
runQuery: request => {
requestNum++;
switch (requestNum) {
case 1:
return stream(
...getDocResponsesFollowedByError(
documentIds,
250,
/*startAtEnd*/ false
)
);
case 2:
return stream(...getDocResponsesForRequest(request!, documentIds));
default:
throw new Error(`should never get here (requestNum=${requestNum})`);
}
},
};

// Create an async iterator to get the result set.
const limit = 300;
firestore = await createInstance(overrides);
const query = firestore.collection('collectionId').limit(limit);
// eslint-disable-next-line @typescript-eslint/no-unused-vars
query._queryUtil._isPermanentRpcError = (err, methodName) => false;
const iterator = query
.stream()
[Symbol.asyncIterator]() as AsyncIterator<QueryDocumentSnapshot>;
const snapshots = await collect(iterator);

// Verify that we got the correct number of results, and the results match
// the documents we expect.
const actualDocumentIds = snapshots.map(snapshot => snapshot.id);
expect(actualDocumentIds.length).to.eql(limit);
expect(actualDocumentIds).to.eql(documentIds.slice(0, limit));
});

it('resuming queries with a cursor should respect the original query limitToLast', async () => {
// Generate the IDs of the documents that will match the query.
const documentIds = Array.from(new Array(500), (_, index) => `doc${index}`);

// Set up the mocked responses from Watch.
let requestNum = 0;
const overrides: ApiOverride = {
runQuery: request => {
requestNum++;
switch (requestNum) {
case 1:
return stream(
...getDocResponsesFollowedByError(
documentIds,
250,
/*startAtEnd*/ true
)
);
case 2:
return stream(...getDocResponsesForRequest(request!, documentIds));
default:
throw new Error(`should never get here (requestNum=${requestNum})`);
}
},
};

// `stream()` cannot be called for `limitToLast` queries. We can, however,
// test using the `.get()` method which does some additional processing.
const limit = 300;
firestore = await createInstance(overrides);
const query = firestore
.collection('collectionId')
.orderBy(FieldPath.documentId())
.limitToLast(limit);
// eslint-disable-next-line @typescript-eslint/no-unused-vars
query._queryUtil._isPermanentRpcError = (err, methodName) => false;
const snapshots = await query.get();

// Verify that we got the correct number of results, and the results match
// the documents we expect.
const actualDocumentIds = snapshots.docs.map(snapshot => snapshot.id);
expect(actualDocumentIds.length).to.eql(limit);
// slice(-limit) returns the last `limit` documents in the array.
expect(actualDocumentIds).to.eql(documentIds.slice(-limit));
});
});
2 changes: 1 addition & 1 deletion dev/test/recursive-delete.ts
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ describe('recursiveDelete() method:', () => {
'LESS_THAN',
endAt('root')
),
limit(RECURSIVE_DELETE_MAX_PENDING_OPS)
limit(RECURSIVE_DELETE_MAX_PENDING_OPS - 1)
);
return stream();
}
Expand Down
Loading