Skip to content

Commit c6e814d

Browse files
committed
fix: add fallback moderation in case openai goes down
1 parent 97a1616 commit c6e814d

File tree

2 files changed

+111
-7
lines changed

2 files changed

+111
-7
lines changed

src/backend/src/modules/puterai/AIChatService.js

+34-7
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ const { DB_WRITE } = require("../../services/database/consts");
66
const { TypeSpec } = require("../../services/drivers/meta/Construct");
77
const { TypedValue } = require("../../services/drivers/meta/Runtime");
88
const { Context } = require("../../util/context");
9+
const { AsModeration } = require("./lib/AsModeration");
910

1011
// Maximum number of fallback attempts when a model fails, including the first attempt
1112
const MAX_FALLBACKS = 3 + 1; // includes first attempt
@@ -489,11 +490,6 @@ class AIChatService extends BaseService {
489490
* Returns true if OpenAI service is unavailable or all messages pass moderation.
490491
*/
491492
async moderate ({ messages }) {
492-
const svc_openai = this.services.get('openai-completion');
493-
494-
// We can't use moderation of openai service isn't available
495-
if ( ! svc_openai ) return true;
496-
497493
for ( const msg of messages ) {
498494
const texts = [];
499495
if ( typeof msg.content === 'string' ) texts.push(msg.content);
@@ -508,8 +504,39 @@ class AIChatService extends BaseService {
508504

509505
const fulltext = texts.join('\n');
510506

511-
const mod_result = await svc_openai.check_moderation(fulltext);
512-
if ( mod_result.flagged ) return false;
507+
let mod_last_error = null;
508+
let mod_result = null;
509+
try {
510+
const svc_openai = this.services.get('openai-completion');
511+
mod_result = await svc_openai.check_moderation(fulltext);
512+
if ( mod_result.flagged ) return false;
513+
} catch (e) {
514+
console.error(e);
515+
mod_last_error = e;
516+
}
517+
try {
518+
const svc_claude = this.services.get('claude');
519+
const chat = svc_claude.as('puter-chat-completion');
520+
const mod = new AsModeration({
521+
chat,
522+
model: 'claude-3-haiku-20240307',
523+
})
524+
if ( ! await mod.moderate(fulltext) ) {
525+
return false;
526+
}
527+
mod_last_error = null;
528+
} catch (e) {
529+
console.error(e);
530+
mod_last_error = e;
531+
}
532+
533+
if ( mod_last_error ) {
534+
this.log.error('moderation error', {
535+
fulltext,
536+
mod_last_error,
537+
});
538+
throw new Error('no working moderation service');
539+
}
513540
}
514541
return true;
515542
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
const { default: dedent } = require("dedent");
2+
3+
class AsModeration {
4+
constructor ({ chat, model }) {
5+
this.chat = chat;
6+
this.model = model;
7+
}
8+
9+
async moderate (text) {
10+
// copied from Anthropic documentation
11+
// https://docs.anthropic.com/en/docs/about-claude/use-case-guides/content-moderation
12+
const unsafe_categories = [
13+
'Child Exploitation',
14+
'Conspiracy Theories',
15+
'Hate',
16+
'Indiscriminate Weapons',
17+
'Intellectual Property',
18+
'Non-Violent Crimes',
19+
'Privacy',
20+
'Self-Harm',
21+
'Sex Crimes',
22+
'Sexual Content',
23+
'Specialized Advice',
24+
'Violent Crimes'
25+
];
26+
27+
const unsafe_category_str = unsafe_categories.join('\n');
28+
29+
const assessment_prompt = dedent(`
30+
Determine whether the following message warrants moderation,
31+
based on the unsafe categories outlined below.
32+
33+
Message:
34+
<message>
35+
${text
36+
.replace('<', '&lt;')
37+
.replace('>', '&gt;')
38+
}
39+
</message>
40+
41+
Unsafe Categories:
42+
<categories>
43+
${unsafe_category_str}
44+
</categories>
45+
46+
Respond with ONLY a JSON object, using the format below:
47+
{{
48+
"violation": <Boolean field denoting whether the message should be moderated>,
49+
"categories": [Comma-separated list of violated categories],
50+
"explanation": [Optional. Only include if there is a violation.]
51+
}}
52+
`);
53+
54+
const result = await this.chat.complete({
55+
messages: [
56+
{
57+
role: 'user',
58+
content: assessment_prompt,
59+
}
60+
]
61+
});
62+
63+
console.log('result???', require('util').inspect(result, { depth: null }));
64+
65+
const str = result.message?.content?.[0]?.text ??
66+
result.messages?.[0]?.content?.[0]?.text ??
67+
'{ "violation": true }';
68+
69+
const parsed = JSON.parse(str);
70+
console.log('parsed?', parsed);
71+
return ! parsed.violation;
72+
}
73+
}
74+
75+
module.exports = {
76+
AsModeration,
77+
};

0 commit comments

Comments
 (0)