Skip to content

Commit d7d3622

Browse files
authored
enable prompt caching (#10)
*Issue #, if available:* n/a *Description of changes:* Support prompt cache for Sonnet3.7 for token efficiency. https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
1 parent b7d6814 commit d7d3622

File tree

9 files changed

+558
-470
lines changed

9 files changed

+558
-470
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ This is an example implementation of a fully autonomous software development AI
1010
* Powered by AWS serverless services with minimal maintenance costs
1111
* No upfront or fixed costs while you don't use the system
1212
* MCP integration (tool servers)
13-
* Unlimited context window (middle-out)
13+
* Efficient token usage with prompt cache and middle-out strategy
1414
* Reads knowledge from your preferred formats (.clinerules, CLAUDE.md, etc.)
1515
* Can work on OSS forked repositories!
1616

slack-bolt-app/src/app.ts

+23-2
Original file line numberDiff line numberDiff line change
@@ -103,27 +103,48 @@ app.event('app_mention', async ({ event, client, logger }) => {
103103

104104
const tokenSummary = tokenUsage
105105
.map((item) => {
106-
const cost = calculateCost(item.SK, item.inputToken, item.outputToken);
106+
const cost = calculateCost(
107+
item.SK,
108+
item.inputToken,
109+
item.outputToken,
110+
item.cacheReadInputTokens,
111+
item.cacheWriteInputTokens
112+
);
107113
return (
108114
`Model: ${item.SK}\n` +
109115
`Input tokens: ${item.inputToken}\n` +
110116
`Output tokens: ${item.outputToken}\n` +
117+
`Cache Read tokens: ${item.cacheReadInputTokens}\n` +
118+
`Cache Write tokens: ${item.cacheWriteInputTokens}\n` +
111119
`Cost: ${cost.toFixed(4)} USD`
112120
);
113121
})
114122
.join('\n\n');
115123

116124
const totalCost = tokenUsage.reduce((acc, item) => {
117-
return acc + calculateCost(item.SK, item.inputToken, item.outputToken);
125+
return (
126+
acc +
127+
calculateCost(
128+
item.SK,
129+
item.inputToken,
130+
item.outputToken,
131+
item.cacheReadInputTokens,
132+
item.cacheWriteInputTokens
133+
)
134+
);
118135
}, 0);
119136

120137
const totalInputTokens = tokenUsage.reduce((acc, item) => acc + item.inputToken, 0);
121138
const totalOutputTokens = tokenUsage.reduce((acc, item) => acc + item.outputToken, 0);
139+
const totalCacheReadTokens = tokenUsage.reduce((acc, item) => acc + item.cacheReadInputTokens, 0);
140+
const totalCacheWriteTokens = tokenUsage.reduce((acc, item) => acc + item.cacheWriteInputTokens, 0);
122141

123142
const historyText =
124143
`=== Token Usage Summary ===\n` +
125144
`Total Input Tokens: ${totalInputTokens}\n` +
126145
`Total Output Tokens: ${totalOutputTokens}\n` +
146+
`Cache Read tokens: ${totalCacheReadTokens}\n` +
147+
`Cache Write tokens: ${totalCacheWriteTokens}\n` +
127148
`Total Cost: ${totalCost.toFixed(4)} USD\n\n` +
128149
`=== Per Model Breakdown ===\n` +
129150
`${tokenSummary}\n\n` +

slack-bolt-app/src/util/cost.ts

+18-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,23 @@
1-
const modelPricing: Record<string, { input: number; output: number }> = {
2-
'3-7-sonnet': { input: 0.003, output: 0.015 },
3-
'3-5-sonnet': { input: 0.003, output: 0.015 },
4-
'3-5-haiku': { input: 0.0008, output: 0.004 },
1+
const modelPricing = {
2+
'3-7-sonnet': { input: 0.003, output: 0.015, cacheRead: 0.0003, cacheWrite: 0.00375 },
3+
'3-5-sonnet': { input: 0.003, output: 0.015, cacheRead: 0.0003, cacheWrite: 0.00375 },
4+
'3-5-haiku': { input: 0.0008, output: 0.004, cacheRead: 0.00008, cacheWrite: 0.001 },
55
};
66

7-
export const calculateCost = (modelId: string, inputTokens: number, outputTokens: number) => {
7+
export const calculateCost = (
8+
modelId: string,
9+
inputTokens: number,
10+
outputTokens: number,
11+
cacheReadTokens: number,
12+
cacheWriteTokens: number
13+
) => {
814
const pricing = Object.entries(modelPricing).find(([key]) => modelId.includes(key))?.[1];
915
if (pricing == null) return 0;
10-
return (inputTokens * pricing.input + outputTokens * pricing.output) / 1000;
16+
return (
17+
(inputTokens * pricing.input +
18+
outputTokens * pricing.output +
19+
cacheReadTokens * pricing.cacheRead +
20+
cacheWriteTokens * pricing.cacheWrite) /
21+
1000
22+
);
1123
};

0 commit comments

Comments
 (0)