Skip to content

Commit c4f4195

Browse files
Use regex to match badwords (#24)
* temp * Use Regex to match bad words * Fix typo in test regex * Better default badwords using regex * Fix tests * Fix: Don't match when the badword is in another word * Better typography (also renamed a variable)
1 parent 3aeb251 commit c4f4195

File tree

6 files changed

+126
-29
lines changed

6 files changed

+126
-29
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ This plugin allows you to censor profanity on your Mattermost server. The plugin
2525
### Usage
2626

2727
You can edit the bad words list in **System Console > Plugins > Profanity Filter > Bad Words list**.
28+
In this list, you can use Regular Expressions to match bad words. For example, `bad[[:space:]]?word` will match both `badword` and `bad word`.

plugin.json

+4-4
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,19 @@
2222
"key": "CensorCharacter",
2323
"display_name": "Censor Character",
2424
"type": "text",
25-
"help_text": "The character(s) to use to censor profanity. Censored words' letters will be replaced with this character. Note that markdown will be interpreted. You can escape markdown character with a backslash. For using * you type \\*.",
25+
"help_text": "The character(s) to use to censor profanity. Censored words' letters will be replaced with this character. Note that markdown will be interpreted. You can escape markdown character with a backslash. For using `*` you type `\\*`.",
2626
"placeholder": "Ex. \\*",
2727
"default": "\\*"
2828
},
2929
{
3030
"key": "BadWordsList",
3131
"display_name": "Bad words list",
3232
"type": "longtext",
33-
"help_text": "The words to censor, separated by spaces",
34-
"default": "4r5e 5h1t 5hit a55 anal anus ar5e arrse arse ass ass-fucker asses assfucker assfukka asshole assholes asswhole a_s_s b!tch b00bs b17ch b1tch ballbag balls ballsack bastard beastial beastiality bellend bestial bestiality bi+ch biatch bitch bitcher bitchers bitches bitchin bitching bloody blow job blowjob blowjobs boiolas bollock bollok boner boob boobs booobs boooobs booooobs booooooobs breasts buceta bugger bum bunny fucker butt butthole buttmuch buttplug c0ck c0cksucker carpet muncher cawk chink cipa cl1t clit clitoris clits cnut cock cock-sucker cockface cockhead cockmunch cockmuncher cocks cocksuck cocksucked cocksucker cocksucking cocksucks cocksuka cocksukka cok cokmuncher coksucka coon cox crap cum cummer cumming cums cumshot cunilingus cunillingus cunnilingus cunt cuntlick cuntlicker cuntlicking cunts cyalis cyberfuc cyberfuck cyberfucked cyberfucker cyberfuckers cyberfucking d1ck damn dick dickhead dildo dildos dink dinks dirsa dlck dog-fucker doggin dogging donkeyribber doosh duche dyke ejaculate ejaculated ejaculates ejaculating ejaculatings ejaculation ejakulate f4nny fag fagging faggitt faggot faggs fagot fagots fags fanny fannyflaps fannyfucker fanyy fatass fcuk fcuker fcuking feck fecker felching fellate fellatio fingerfuck fingerfucked fingerfucker fingerfuckers fingerfucking fingerfucks fistfuck fistfucked fistfucker fistfuckers fistfucking fistfuckings fistfucks flange fook fooker fuck fucka fucked fucker fuckers fuckhead fuckheads fuckin fucking fuckings fuckingshitmotherfucker fuckme fucks fuckwhit fuckwit fudge packer fudgepacker fuk fuker fukker fukkin fuks fukwhit fukwit fux fux0r f_u_c_k gangbang gangbanged gangbangs gaylord gaysex goatse God god-dam god-damned goddamn goddamned hardcoresex hell heshe hoar hoare hoer homo hore horniest horny hotsex jack-off jackoff jap jerk-off jism jiz jizm jizz kawk knob knobead knobed knobend knobhead knobjocky knobjokey kock kondum kondums kum kummer kumming kums kunilingus l3i+ch l3itch labia lust lusting m0f0 m0fo m45terbate ma5terb8 ma5terbate masochist master-bate masterb8 masterbat* masterbat3 masterbate masterbation masterbations masturbate mo-fo mof0 mofo mothafuck mothafucka mothafuckas mothafuckaz mothafucked mothafucker mothafuckers mothafuckin mothafucking mothafuckings mothafucks mother fucker motherfuck motherfucked motherfucker motherfuckers motherfuckin motherfucking motherfuckings motherfuckka motherfucks muff mutha muthafecker muthafuckker muther mutherfucker n1gga n1gger nazi nigg3r nigg4h nigga niggah niggas niggaz nigger niggers nob nob jokey nobhead nobjocky nobjokey numbnuts nutsack orgasim orgasims orgasm orgasms p0rn pawn pecker penis penisfucker phonesex phuck phuk phuked phuking phukked phukking phuks phuq pigfucker pimpis piss pissed pisser pissers pisses pissflaps pissin pissing pissoff poop porn porno pornography pornos prick pricks pron pube pusse pussi pussies pussy pussys rectum retard rimjaw rimming s hit s.o.b. sadist schlong screwing scroat scrote scrotum semen sex sh!+ sh!t sh1t shag shagger shaggin shagging shemale shi+ shit shitdick shite shited shitey shitfuck shitfull shithead shiting shitings shits shitted shitter shitters shitting shittings shitty skank slut sluts smegma smut snatch son-of-a-bitch spac spunk s_h_i_t t1tt1e5 t1tties teets teez testical testicle tit titfuck tits titt tittie5 tittiefucker titties tittyfuck tittywank titwank tosser turd tw4t twat twathead twatty twunt twunter v14gra v1gra vagina viagra vulva w00se wang wank wanker wanky whoar whore willies willy xrated xxx"
33+
"help_text": "The words to censor, separated by commas. Accentuation and punctuation insensitive. [Regular expressions](https://en.wikipedia.org/wiki/Regular_expression) are interpreted: if you want to censor characters as `.`, `?`, `*`, `{`, `}`, `[`, `]`, please double-escape them like `\\\\.`",
34+
"default": "4r5e,5h1t,5hit,a55,anal,anus,ar5e,arrse,arse,ass(es)?,ass[-]?fucker,assfukka,assholes?,asswhole,a_s_s,b!tch,b17ch,b1tch,ballbag,ballsack,bastard,beastial,beastiality,bellend,bestial,bestiality,bi+ch,biatch,bitch,bitcher,bitchers,bitches,bitchin,bitching,bloody,blow[ ]?jobs?,boiolas,bollock,bollok,boner,b[o0][o0]+bs?,breasts,buceta,bugger,bum,bunny fucker,butt,butt[ ]?hole,buttmuch,buttplug,c[0o]cks?,c0cksucker,carpet muncher,cawk,chink,cipa,cl[i1]t,clitoris,clits,cnut,cock-sucker,cockface,cockhead,cockmunch,cockmuncher,cocksucks?,cocksucked,cocksucker,cocksucking,cocksuka,cocksukka,cok,cokmuncher,coksucka,coon,cox,crap,cums?,cummer,cumming,cumshot?,cunilingus,cunillingus,cunnilingus,cunt,cuntlick,cuntlicker,cuntlicking,cunts,cyalis,cyberfuc,cyberfuck,cyberfucked,cyberfucker,cyberfuckers,cyberfucking,d1ck,damn,dick,dickhead,dildo,dildos,dink,dinks,dirsa,dlck,dog-fucker,doggin,dogging,donkeyribber,doosh,duche,dyke,ejaculate,ejaculated,ejaculates,ejaculating,ejaculatings,ejaculation,ejakulate,f[[:space:]]*u[[:space:]]*c[[:space:]]*k,f[[:space:]]*u[[:space:]]*c[[:space:]]*k[[:space:]]*e[[:space:]]*r,f4nny,fag,fagging,faggitt,faggot,faggs,fagot,fagots,fags,fanny,fannyflaps,fannyfucker,fanyy,fatass,fcuk,fcuker,fcuking,feck,fecker,felching,fellate,fellatio,fingerfuck,fingerfucked,fingerfucker,fingerfuckers,fingerfucking,fingerfucks,fistfuck,fistfucked,fistfucker,fistfuckers,fistfucking,fistfuckings,fistfucks,flange,fook,fooker,fuck,fucka,fucked,fucker,fuckers,fuckhead,fuckheads,fuckin,fucking,fuckings,fuckingshitmother[[:space:]]*fucker,fuckme,fucks,fuckwhit,fuckwit,fudge packer,fudgepacker,fuk,fuker,fukker,fukkin,fuks,fukwhit,fukwit,fux,fux0r,f_u_c_k,gangbang,gangbanged,gangbangs,gaylord,gaysex,goatse,God,god-dam,god-damned,goddamn,goddamned,hardcoresex,hell,heshe,hoar,hoare,hoer,homo,hore,horniest,horny,hotsex,jack-off,jackoff,jap,jerk-off,jism,jiz,jizm,jizz,kawk,knob,knobead,knobed,knobend,knobhead,knobjocky,knobjokey,kock,kondum,kondums,kum,kummer,kumming,kums,kunilingus,l3i\\+ch,l3itch,labia,lust,lusting,m0f0,m0fo,m[a4][s5]terb(at[3e]|8),ma5terbate,masochist,master-bate,masterbations?,mo-fo,mof[o0],motha[[:space:]]*fuck,motha[[:space:]]*fuckas?,motha[[:space:]]*fuckaz,motha[[:space:]]*fucked,motha[[:space:]]*fuckers?,motha[[:space:]]*fuckin,motha[[:space:]]*fucking,motha[[:space:]]*fuckings,motha[[:space:]]*fucks,mother[[:space:]]*fuck,mother[[:space:]]*fucked,mother fucker,mother fuckers,mother fuckin,mother fucking,mother fuckings,mother fuckka,mother fucks,mother[[:space:]]*fucker,mother[[:space:]]*fuckers,mother[[:space:]]*fuckin,mother[[:space:]]*fucking,mother[[:space:]]*fuckings,mother[[:space:]]*fuckka,mother[[:space:]]*fucks,muff,mutha,muthafecker,muthafuckker,muther,mutherfucker,n[i1]gg[ea3]r?s?,niggaz,nob,nob jokey,nobhead,nobjocky,nobjokey,numbnuts,nutsack,orgasims?,orgasms?,p[o0]rno?s?,pawn,pecker,penis,penisfucker,phonesex,phuck,phuk,phuked,phuking,phukked,phukking,phuks,phuq,pigfucker,pimpis,piss,pissed,pisser,pissers,pisses,pissflaps,pissin,pissing,pissoff,poop,pornography,prick,pricks,pron,pube,pusse,puss[iy]e?s?,rectum,retard,rimjaw,rimming,s[[:space:]]*h[[:space:]]*i[[:space:]]*t,s\\.o\\.b\\.,sadist,schlong,screwing,scroat,scrote,scrotum,semen,sex,shag,shagger,shaggin,shagging,shemale,sh[i1!][t+]s?,shitdick,shite,shited,shitey,shitfuck,shitfull,shithead,shiting,shitings,shitted,shitter,shitters,shitting,shittings,shitty,skank,sluts?,smegma,smut,snatch,son-of-a-bitch,spac,spunk,t1tt1e5,t1tties,teets,teez,testical,testicle,tits?,titfuck,titt,tittie5,tittiefucker,titties?,tittyfuck,tittywank,titwank,tosser,turd,tw[4a]t,twathead,twatty,twunt,twunter,v14gra,v1gra,vagina,viagra,vulva,w00se,wang,wank,wanker,wanky,whoar,whores?,willies,willy,xrated,x[[:space:]]*x[[:space:]]*x"
3535
}
3636
],
3737
"header": "",
3838
"footer": ""
3939
}
40-
}
40+
}

server/configuration.go

+25-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package main
22

33
import (
4+
"fmt"
45
"reflect"
6+
"regexp"
7+
"sort"
58
"strings"
69

710
"github.com/pkg/errors"
@@ -83,11 +86,29 @@ func (p *Plugin) OnConfigurationChange() error {
8386

8487
p.setConfiguration(configuration)
8588

86-
badWordsFromSettings := strings.Split(configuration.BadWordsList, " ")
87-
p.badWords = make(map[string]bool, len(badWordsFromSettings))
88-
for _, word := range badWordsFromSettings {
89-
p.badWords[strings.ToLower(removeAccents(word))] = true
89+
// Addind space around the words
90+
regexString := wordListToRegex(configuration.BadWordsList)
91+
regex, err := regexp.Compile(regexString)
92+
if err != nil {
93+
return err
9094
}
9195

96+
p.badWordsRegex = regex
97+
9298
return nil
9399
}
100+
101+
func wordListToRegex(wordList string) (regexStr string) {
102+
split := strings.Split(wordList, ",")
103+
104+
// Sorting by length because if "bad" and "bad word" are in the list,
105+
// we want "bad word" to be the first match
106+
sort.Slice(split, func(i, j int) bool { return len(split[i]) > len(split[j]) })
107+
108+
regexStr = fmt.Sprintf(
109+
`(?mi)\b(%s)\b`,
110+
strings.Join(split, "|"),
111+
)
112+
113+
return regexStr
114+
}

server/configuration_test.go

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package main
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestWordListToRegex(t *testing.T) {
10+
p := Plugin{
11+
configuration: &configuration{
12+
BadWordsList: "abc,def ghi",
13+
},
14+
}
15+
16+
t.Run("Build Regex", func(t *testing.T) {
17+
regexStr := wordListToRegex(p.getConfiguration().BadWordsList)
18+
19+
assert.Equal(t, regexStr, `(?mi)\b(def ghi|abc)\b`)
20+
})
21+
22+
p2 := Plugin{
23+
configuration: &configuration{
24+
BadWordsList: "abc,abc def",
25+
},
26+
}
27+
28+
t.Run("Build In double Regex", func(t *testing.T) {
29+
regexStr := wordListToRegex(p2.getConfiguration().BadWordsList)
30+
31+
assert.Equal(t, regexStr, `(?mi)\b(abc def|abc)\b`)
32+
})
33+
}

server/plugin.go

+19-16
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package main
22

33
import (
4+
"regexp"
45
"strings"
56
"sync"
67
"unicode"
@@ -23,29 +24,31 @@ type Plugin struct {
2324
// setConfiguration for usage.
2425
configuration *configuration
2526

26-
badWords map[string]bool
27-
}
28-
29-
func (p *Plugin) WordIsBad(word string) bool {
30-
_, ok := p.badWords[strings.ToLower(removeAccents(word))]
31-
return ok
27+
badWordsRegex *regexp.Regexp
3228
}
3329

3430
func (p *Plugin) FilterPost(post *model.Post) (*model.Post, string) {
31+
postMessageWithoutAccents := removeAccents(post.Message)
32+
33+
if !p.badWordsRegex.MatchString(postMessageWithoutAccents) {
34+
return post, ""
35+
}
36+
3537
configuration := p.getConfiguration()
38+
detectedBadWords := p.badWordsRegex.FindAllString(postMessageWithoutAccents, -1)
39+
40+
if configuration.RejectPosts {
41+
return nil, "Profane word not allowed: `" + strings.Join(detectedBadWords, ", ") + "`"
42+
}
3643

37-
message := post.Message
38-
words := strings.Split(message, " ")
39-
for i, word := range words {
40-
if p.WordIsBad(word) {
41-
if configuration.RejectPosts {
42-
return nil, "Profane word not allowed: " + word
43-
}
44-
words[i] = strings.Repeat(configuration.CensorCharacter, len(word))
45-
}
44+
for _, word := range detectedBadWords {
45+
post.Message = strings.ReplaceAll(
46+
post.Message,
47+
word,
48+
strings.Repeat(p.getConfiguration().CensorCharacter, len(word)),
49+
)
4650
}
4751

48-
post.Message = strings.Join(words, " ")
4952
return post, ""
5053
}
5154

server/plugin_test.go

+44-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package main
22

33
import (
4+
"regexp"
45
"testing"
56

67
"github.com/stretchr/testify/assert"
@@ -11,14 +12,13 @@ import (
1112

1213
func TestMessageWillBePosted(t *testing.T) {
1314
p := Plugin{
14-
badWords: map[string]bool{
15-
"abc": true,
16-
},
1715
configuration: &configuration{
1816
CensorCharacter: "*",
1917
RejectPosts: false,
18+
BadWordsList: "def ghi,abc",
2019
},
2120
}
21+
p.badWordsRegex = regexp.MustCompile(wordListToRegex(p.getConfiguration().BadWordsList))
2222

2323
t.Run("word matches", func(t *testing.T) {
2424
in := &model.Post{
@@ -35,10 +35,49 @@ func TestMessageWillBePosted(t *testing.T) {
3535

3636
t.Run("word matches case-insensitive", func(t *testing.T) {
3737
in := &model.Post{
38-
Message: "123 ABC 456",
38+
Message: "123 ABC AbC 456",
3939
}
4040
out := &model.Post{
41-
Message: "123 *** 456",
41+
Message: "123 *** *** 456",
42+
}
43+
44+
rpost, s := p.MessageWillBePosted(&plugin.Context{}, in)
45+
assert.Empty(t, s)
46+
assert.Equal(t, out, rpost)
47+
})
48+
49+
t.Run("word with spaces matches", func(t *testing.T) {
50+
in := &model.Post{
51+
Message: "123 def ghi 456",
52+
}
53+
out := &model.Post{
54+
Message: "123 ******* 456",
55+
}
56+
57+
rpost, s := p.MessageWillBePosted(&plugin.Context{}, in)
58+
assert.Empty(t, s)
59+
assert.Equal(t, out, rpost)
60+
})
61+
62+
t.Run("word matches with punctuation", func(t *testing.T) {
63+
in := &model.Post{
64+
Message: "123 abc, 456",
65+
}
66+
out := &model.Post{
67+
Message: "123 ***, 456",
68+
}
69+
70+
rpost, s := p.MessageWillBePosted(&plugin.Context{}, in)
71+
assert.Empty(t, s)
72+
assert.Equal(t, out, rpost)
73+
})
74+
75+
t.Run("word shouldn't match because it in another word", func(t *testing.T) {
76+
in := &model.Post{
77+
Message: "helloabcworld helloabc abchello",
78+
}
79+
out := &model.Post{
80+
Message: "helloabcworld helloabc abchello",
4281
}
4382

4483
rpost, s := p.MessageWillBePosted(&plugin.Context{}, in)

0 commit comments

Comments
 (0)