Skip to content

Commit d6964a4

Browse files
feat: improve batching summary errors (#2509)
To avoid data loss, the Batching api throws an exception when the batcher is closed when at least 1 entry failed. To help debugging, the BatchingException tries to be helpful by giving some details about the errors. Since the Batcher lifetime can extend indefinitely, the Batcher implementation tries to keep a bound on the amount of resources it uses to track the errors. Previously it would only track exception types and counts. The idea being that if the customer has the need for fine grained details, the customer can retrieve the details from the ApiFuture that was returned when an entry was added. However this hasn't panned out and creates confusion. This PR stores a sample of the error messages. The sample is by default capped to 50 entry and 50 rpc error messages. This can be adjusted by setting system properties Thank you for opening a Pull Request! For general contributing guidelines, please refer to [contributing guide](https://github.com/googleapis/gapic-generator-java/blob/main/CONTRIBUTING.md) Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/gapic-generator-java/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> ☕️
1 parent aab08b4 commit d6964a4

File tree

2 files changed

+51
-3
lines changed

2 files changed

+51
-3
lines changed

gax-java/gax/src/main/java/com/google/api/gax/batching/BatcherStats.java

+32
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@
3131

3232
import com.google.api.gax.rpc.ApiException;
3333
import com.google.api.gax.rpc.StatusCode.Code;
34+
import com.google.common.base.Joiner;
3435
import com.google.common.base.MoreObjects;
36+
import com.google.common.collect.EvictingQueue;
3537
import java.util.HashMap;
3638
import java.util.Iterator;
3739
import java.util.List;
@@ -52,6 +54,21 @@ class BatcherStats {
5254
private final Map<Class, Integer> entryExceptionCounts = new HashMap<>();
5355
private final Map<Code, Integer> entryStatusCounts = new HashMap<>();
5456

57+
/**
58+
* The maximum number of error messages that a Batcher instance will retain. By default, a Batcher
59+
* instance will retain 50 entry error messages and 50 RPC error messages. This limit can be
60+
* temporarily increased by setting the {@code com.google.api.gax.batching.errors.max-samples}
61+
* system property. This should only be needed in very rare situations and should not be
62+
* considered part of the public api.
63+
*/
64+
private final int MAX_ERROR_MSG_SAMPLES =
65+
Integer.getInteger("com.google.api.gax.batching.errors.max-samples", 50);
66+
67+
private final EvictingQueue<String> sampleOfRpcErrors =
68+
EvictingQueue.create(MAX_ERROR_MSG_SAMPLES);
69+
private final EvictingQueue<String> sampleOfEntryErrors =
70+
EvictingQueue.create(MAX_ERROR_MSG_SAMPLES);
71+
5572
/**
5673
* Records the count of the exception and it's type when a complete batch failed to apply.
5774
*
@@ -69,6 +86,8 @@ synchronized void recordBatchFailure(Throwable throwable) {
6986
requestStatusCounts.put(code, oldStatusCount + 1);
7087
}
7188

89+
sampleOfRpcErrors.add(throwable.toString());
90+
7291
int oldExceptionCount = MoreObjects.firstNonNull(requestExceptionCounts.get(exceptionClass), 0);
7392
requestExceptionCounts.put(exceptionClass, oldExceptionCount + 1);
7493
}
@@ -96,6 +115,8 @@ synchronized <T extends BatchEntry> void recordBatchElementsCompletion(
96115
Throwable actualCause = throwable.getCause();
97116
Class exceptionClass = actualCause.getClass();
98117

118+
sampleOfEntryErrors.add(actualCause.toString());
119+
99120
if (actualCause instanceof ApiException) {
100121
Code code = ((ApiException) actualCause).getStatusCode().getCode();
101122
exceptionClass = ApiException.class;
@@ -144,6 +165,17 @@ synchronized BatchingException asException() {
144165
.append(buildExceptionList(entryExceptionCounts, entryStatusCounts))
145166
.append(".");
146167
}
168+
169+
if (!sampleOfRpcErrors.isEmpty()) {
170+
messageBuilder.append(" Sample of RPC errors: ");
171+
messageBuilder.append(Joiner.on(", ").join(sampleOfRpcErrors));
172+
messageBuilder.append(".");
173+
}
174+
if (!sampleOfEntryErrors.isEmpty()) {
175+
messageBuilder.append(" Sample of entry errors: ");
176+
messageBuilder.append(Joiner.on(", ").join(sampleOfEntryErrors));
177+
messageBuilder.append(".");
178+
}
147179
return new BatchingException(messageBuilder.toString());
148180
}
149181

gax-java/gax/src/test/java/com/google/api/gax/batching/BatcherStatsTest.java

+19-3
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,10 @@ public void testRequestFailuresOnly() {
5555

5656
batcherStats.recordBatchFailure(
5757
ApiExceptionFactory.createException(
58-
new RuntimeException(), FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT), false));
58+
"fake api error",
59+
new RuntimeException(),
60+
FakeStatusCode.of(StatusCode.Code.INVALID_ARGUMENT),
61+
false));
5962

6063
batcherStats.recordBatchFailure(new RuntimeException("Request failed"));
6164

@@ -65,6 +68,10 @@ public void testRequestFailuresOnly() {
6568
assertThat(exception).hasMessageThat().contains("1 RuntimeException");
6669
assertThat(exception).hasMessageThat().contains("1 ApiException(1 INVALID_ARGUMENT)");
6770
assertThat(exception).hasMessageThat().contains("and 0 partial failures.");
71+
assertThat(exception)
72+
.hasMessageThat()
73+
.contains(
74+
"com.google.api.gax.rpc.InvalidArgumentException: fake api error, java.lang.RuntimeException: Request failed.");
6875
}
6976

7077
@Test
@@ -79,7 +86,10 @@ public void testEntryFailureOnly() {
7986
SettableApiFuture<Integer> batchTwoResult = SettableApiFuture.create();
8087
batchTwoResult.setException(
8188
ApiExceptionFactory.createException(
82-
new RuntimeException(), FakeStatusCode.of(StatusCode.Code.UNAVAILABLE), false));
89+
"fake entry error",
90+
new RuntimeException(),
91+
FakeStatusCode.of(StatusCode.Code.UNAVAILABLE),
92+
false));
8393
batcherStats.recordBatchElementsCompletion(
8494
ImmutableList.of(BatchEntry.create(2, batchTwoResult)));
8595

@@ -89,6 +99,10 @@ public void testEntryFailureOnly() {
8999
.contains("The 2 partial failures contained 2 entries that failed with:");
90100
assertThat(ex).hasMessageThat().contains("1 ApiException(1 UNAVAILABLE)");
91101
assertThat(ex).hasMessageThat().contains("1 IllegalStateException");
102+
assertThat(ex)
103+
.hasMessageThat()
104+
.contains(
105+
"Sample of entry errors: java.lang.IllegalStateException: local element failure, com.google.api.gax.rpc.UnavailableException: fake entry error.");
92106
}
93107

94108
@Test
@@ -110,6 +124,8 @@ public void testRequestAndEntryFailures() {
110124
.contains(
111125
"Batching finished with 1 batches failed to apply due to: 1 RuntimeException and 1 "
112126
+ "partial failures. The 1 partial failures contained 1 entries that failed with:"
113-
+ " 1 ApiException(1 ALREADY_EXISTS).");
127+
+ " 1 ApiException(1 ALREADY_EXISTS)."
128+
+ " Sample of RPC errors: java.lang.RuntimeException: Batch failure."
129+
+ " Sample of entry errors: com.google.api.gax.rpc.AlreadyExistsException: java.lang.RuntimeException.");
114130
}
115131
}

0 commit comments

Comments
 (0)