Skip to content

Use regex to match badwords #24

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 6, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ This plugin allows you to censor profanity on your Mattermost server. The plugin
### Usage

You can edit the bad words list in **System Console > Plugins > Profanity Filter > Bad Words list**.
In this list, you can use Regular Expressions to match bad words. For example, `bad[[:space:]]?word` will match both `badword` and `bad word`.
8 changes: 4 additions & 4 deletions plugin.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@
"key": "CensorCharacter",
"display_name": "Censor Character",
"type": "text",
"help_text": "The character(s) to use to censor profanity. Censored words' letters will be replaced with this character. Note that markdown will be interpreted. You can escape markdown character with a backslash. For using * you type \\*.",
"help_text": "The character(s) to use to censor profanity. Censored words' letters will be replaced with this character. Note that markdown will be interpreted. You can escape markdown character with a backslash. For using `*` you type `\\*`.",
"placeholder": "Ex. \\*",
"default": "\\*"
},
{
"key": "BadWordsList",
"display_name": "Bad words list",
"type": "longtext",
"help_text": "The words to censor, separated by spaces",
"default": "4r5e 5h1t 5hit a55 anal anus ar5e arrse arse ass ass-fucker asses assfucker assfukka asshole assholes asswhole a_s_s b!tch b00bs b17ch b1tch ballbag balls ballsack bastard beastial beastiality bellend bestial bestiality bi+ch biatch bitch bitcher bitchers bitches bitchin bitching bloody blow job blowjob blowjobs boiolas bollock bollok boner boob boobs booobs boooobs booooobs booooooobs breasts buceta bugger bum bunny fucker butt butthole buttmuch buttplug c0ck c0cksucker carpet muncher cawk chink cipa cl1t clit clitoris clits cnut cock cock-sucker cockface cockhead cockmunch cockmuncher cocks cocksuck cocksucked cocksucker cocksucking cocksucks cocksuka cocksukka cok cokmuncher coksucka coon cox crap cum cummer cumming cums cumshot cunilingus cunillingus cunnilingus cunt cuntlick cuntlicker cuntlicking cunts cyalis cyberfuc cyberfuck cyberfucked cyberfucker cyberfuckers cyberfucking d1ck damn dick dickhead dildo dildos dink dinks dirsa dlck dog-fucker doggin dogging donkeyribber doosh duche dyke ejaculate ejaculated ejaculates ejaculating ejaculatings ejaculation ejakulate f4nny fag fagging faggitt faggot faggs fagot fagots fags fanny fannyflaps fannyfucker fanyy fatass fcuk fcuker fcuking feck fecker felching fellate fellatio fingerfuck fingerfucked fingerfucker fingerfuckers fingerfucking fingerfucks fistfuck fistfucked fistfucker fistfuckers fistfucking fistfuckings fistfucks flange fook fooker fuck fucka fucked fucker fuckers fuckhead fuckheads fuckin fucking fuckings fuckingshitmotherfucker fuckme fucks fuckwhit fuckwit fudge packer fudgepacker fuk fuker fukker fukkin fuks fukwhit fukwit fux fux0r f_u_c_k gangbang gangbanged gangbangs gaylord gaysex goatse God god-dam god-damned goddamn goddamned hardcoresex hell heshe hoar hoare hoer homo hore horniest horny hotsex jack-off jackoff jap jerk-off jism jiz jizm jizz kawk knob knobead knobed knobend knobhead knobjocky knobjokey kock kondum kondums kum kummer kumming kums kunilingus l3i+ch l3itch labia lust lusting m0f0 m0fo m45terbate ma5terb8 ma5terbate masochist master-bate masterb8 masterbat* masterbat3 masterbate masterbation masterbations masturbate mo-fo mof0 mofo mothafuck mothafucka mothafuckas mothafuckaz mothafucked mothafucker mothafuckers mothafuckin mothafucking mothafuckings mothafucks mother fucker motherfuck motherfucked motherfucker motherfuckers motherfuckin motherfucking motherfuckings motherfuckka motherfucks muff mutha muthafecker muthafuckker muther mutherfucker n1gga n1gger nazi nigg3r nigg4h nigga niggah niggas niggaz nigger niggers nob nob jokey nobhead nobjocky nobjokey numbnuts nutsack orgasim orgasims orgasm orgasms p0rn pawn pecker penis penisfucker phonesex phuck phuk phuked phuking phukked phukking phuks phuq pigfucker pimpis piss pissed pisser pissers pisses pissflaps pissin pissing pissoff poop porn porno pornography pornos prick pricks pron pube pusse pussi pussies pussy pussys rectum retard rimjaw rimming s hit s.o.b. sadist schlong screwing scroat scrote scrotum semen sex sh!+ sh!t sh1t shag shagger shaggin shagging shemale shi+ shit shitdick shite shited shitey shitfuck shitfull shithead shiting shitings shits shitted shitter shitters shitting shittings shitty skank slut sluts smegma smut snatch son-of-a-bitch spac spunk s_h_i_t t1tt1e5 t1tties teets teez testical testicle tit titfuck tits titt tittie5 tittiefucker titties tittyfuck tittywank titwank tosser turd tw4t twat twathead twatty twunt twunter v14gra v1gra vagina viagra vulva w00se wang wank wanker wanky whoar whore willies willy xrated xxx"
"help_text": "The words to censor, separated by commas. Accentuation and punctuation insensitive. [Regular expressions](https://en.wikipedia.org/wiki/Regular_expression) are interpreted: if you want to censor characters as `.`, `?`, `*`, `{`, `}`, `[`, `]`, please double-escape them like `\\\\.`",
"default": "4r5e,5h1t,5hit,a55,anal,anus,ar5e,arrse,arse,ass(es)?,ass[-]?fucker,assfukka,assholes?,asswhole,a_s_s,b!tch,b17ch,b1tch,ballbag,ballsack,bastard,beastial,beastiality,bellend,bestial,bestiality,bi+ch,biatch,bitch,bitcher,bitchers,bitches,bitchin,bitching,bloody,blow[ ]?jobs?,boiolas,bollock,bollok,boner,b[o0][o0]+bs?,breasts,buceta,bugger,bum,bunny fucker,butt,butt[ ]?hole,buttmuch,buttplug,c[0o]cks?,c0cksucker,carpet muncher,cawk,chink,cipa,cl[i1]t,clitoris,clits,cnut,cock-sucker,cockface,cockhead,cockmunch,cockmuncher,cocksucks?,cocksucked,cocksucker,cocksucking,cocksuka,cocksukka,cok,cokmuncher,coksucka,coon,cox,crap,cums?,cummer,cumming,cumshot?,cunilingus,cunillingus,cunnilingus,cunt,cuntlick,cuntlicker,cuntlicking,cunts,cyalis,cyberfuc,cyberfuck,cyberfucked,cyberfucker,cyberfuckers,cyberfucking,d1ck,damn,dick,dickhead,dildo,dildos,dink,dinks,dirsa,dlck,dog-fucker,doggin,dogging,donkeyribber,doosh,duche,dyke,ejaculate,ejaculated,ejaculates,ejaculating,ejaculatings,ejaculation,ejakulate,f[[:space:]]*u[[:space:]]*c[[:space:]]*k,f[[:space:]]*u[[:space:]]*c[[:space:]]*k[[:space:]]*e[[:space:]]*r,f4nny,fag,fagging,faggitt,faggot,faggs,fagot,fagots,fags,fanny,fannyflaps,fannyfucker,fanyy,fatass,fcuk,fcuker,fcuking,feck,fecker,felching,fellate,fellatio,fingerfuck,fingerfucked,fingerfucker,fingerfuckers,fingerfucking,fingerfucks,fistfuck,fistfucked,fistfucker,fistfuckers,fistfucking,fistfuckings,fistfucks,flange,fook,fooker,fuck,fucka,fucked,fucker,fuckers,fuckhead,fuckheads,fuckin,fucking,fuckings,fuckingshitmother[[:space:]]*fucker,fuckme,fucks,fuckwhit,fuckwit,fudge packer,fudgepacker,fuk,fuker,fukker,fukkin,fuks,fukwhit,fukwit,fux,fux0r,f_u_c_k,gangbang,gangbanged,gangbangs,gaylord,gaysex,goatse,God,god-dam,god-damned,goddamn,goddamned,hardcoresex,hell,heshe,hoar,hoare,hoer,homo,hore,horniest,horny,hotsex,jack-off,jackoff,jap,jerk-off,jism,jiz,jizm,jizz,kawk,knob,knobead,knobed,knobend,knobhead,knobjocky,knobjokey,kock,kondum,kondums,kum,kummer,kumming,kums,kunilingus,l3i\\+ch,l3itch,labia,lust,lusting,m0f0,m0fo,m[a4][s5]terb(at[3e]|8),ma5terbate,masochist,master-bate,masterbations?,mo-fo,mof[o0],motha[[:space:]]*fuck,motha[[:space:]]*fuckas?,motha[[:space:]]*fuckaz,motha[[:space:]]*fucked,motha[[:space:]]*fuckers?,motha[[:space:]]*fuckin,motha[[:space:]]*fucking,motha[[:space:]]*fuckings,motha[[:space:]]*fucks,mother[[:space:]]*fuck,mother[[:space:]]*fucked,mother fucker,mother fuckers,mother fuckin,mother fucking,mother fuckings,mother fuckka,mother fucks,mother[[:space:]]*fucker,mother[[:space:]]*fuckers,mother[[:space:]]*fuckin,mother[[:space:]]*fucking,mother[[:space:]]*fuckings,mother[[:space:]]*fuckka,mother[[:space:]]*fucks,muff,mutha,muthafecker,muthafuckker,muther,mutherfucker,n[i1]gg[ea3]r?s?,niggaz,nob,nob jokey,nobhead,nobjocky,nobjokey,numbnuts,nutsack,orgasims?,orgasms?,p[o0]rno?s?,pawn,pecker,penis,penisfucker,phonesex,phuck,phuk,phuked,phuking,phukked,phukking,phuks,phuq,pigfucker,pimpis,piss,pissed,pisser,pissers,pisses,pissflaps,pissin,pissing,pissoff,poop,pornography,prick,pricks,pron,pube,pusse,puss[iy]e?s?,rectum,retard,rimjaw,rimming,s[[:space:]]*h[[:space:]]*i[[:space:]]*t,s\\.o\\.b\\.,sadist,schlong,screwing,scroat,scrote,scrotum,semen,sex,shag,shagger,shaggin,shagging,shemale,sh[i1!][t+]s?,shitdick,shite,shited,shitey,shitfuck,shitfull,shithead,shiting,shitings,shitted,shitter,shitters,shitting,shittings,shitty,skank,sluts?,smegma,smut,snatch,son-of-a-bitch,spac,spunk,t1tt1e5,t1tties,teets,teez,testical,testicle,tits?,titfuck,titt,tittie5,tittiefucker,titties?,tittyfuck,tittywank,titwank,tosser,turd,tw[4a]t,twathead,twatty,twunt,twunter,v14gra,v1gra,vagina,viagra,vulva,w00se,wang,wank,wanker,wanky,whoar,whores?,willies,willy,xrated,x[[:space:]]*x[[:space:]]*x"
}
],
"header": "",
"footer": ""
}
}
}
29 changes: 25 additions & 4 deletions server/configuration.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package main

import (
"fmt"
"reflect"
"regexp"
"sort"
"strings"

"github.com/pkg/errors"
Expand Down Expand Up @@ -83,11 +86,29 @@ func (p *Plugin) OnConfigurationChange() error {

p.setConfiguration(configuration)

badWordsFromSettings := strings.Split(configuration.BadWordsList, " ")
p.badWords = make(map[string]bool, len(badWordsFromSettings))
for _, word := range badWordsFromSettings {
p.badWords[strings.ToLower(removeAccents(word))] = true
// Addind space around the words
regexString := wordListToRegex(configuration.BadWordsList)
regex, err := regexp.Compile(regexString)
if err != nil {
return err
}

p.badWordsRegex = regex

return nil
}

func wordListToRegex(wordList string) (regexStr string) {
split := strings.Split(wordList, ",")

// Sorting by length because if "bad" and "bad word" are in the list,
// we want "bad word" to be the first match
sort.Slice(split, func(i, j int) bool { return len(split[i]) > len(split[j]) })

regexStr = fmt.Sprintf(
`(?mi)\b(%s)\b`,
strings.Join(split, "|"),
)

return regexStr
}
33 changes: 33 additions & 0 deletions server/configuration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package main

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestWordListToRegex(t *testing.T) {
p := Plugin{
configuration: &configuration{
BadWordsList: "abc,def ghi",
},
}

t.Run("Build Regex", func(t *testing.T) {
regexStr := wordListToRegex(p.getConfiguration().BadWordsList)

assert.Equal(t, regexStr, `(?mi)\b(def ghi|abc)\b`)
})

p2 := Plugin{
configuration: &configuration{
BadWordsList: "abc,abc def",
},
}

t.Run("Build In double Regex", func(t *testing.T) {
regexStr := wordListToRegex(p2.getConfiguration().BadWordsList)

assert.Equal(t, regexStr, `(?mi)\b(abc def|abc)\b`)
})
}
36 changes: 19 additions & 17 deletions server/plugin.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"regexp"
"strings"
"sync"
"unicode"
Expand All @@ -23,29 +24,30 @@ type Plugin struct {
// setConfiguration for usage.
configuration *configuration

badWords map[string]bool
}

func (p *Plugin) WordIsBad(word string) bool {
_, ok := p.badWords[strings.ToLower(removeAccents(word))]
return ok
badWordsRegex *regexp.Regexp
}

func (p *Plugin) FilterPost(post *model.Post) (*model.Post, string) {
configuration := p.getConfiguration()

message := post.Message
words := strings.Split(message, " ")
for i, word := range words {
if p.WordIsBad(word) {
if configuration.RejectPosts {
return nil, "Profane word not allowed: " + word
}
words[i] = strings.Repeat(configuration.CensorCharacter, len(word))
postWithoutAccents := removeAccents(post.Message)

if p.badWordsRegex.MatchString(postWithoutAccents) {
configuration := p.getConfiguration()

detectedBadWords := p.badWordsRegex.FindAllString(postWithoutAccents, -1)

if configuration.RejectPosts {
return nil, "Profane word not allowed: `" + strings.Join(detectedBadWords, ", ") + "`"
}

for _, word := range detectedBadWords {
post.Message = strings.ReplaceAll(
post.Message,
word,
strings.Repeat(p.getConfiguration().CensorCharacter, len(word)),
)
}
}

post.Message = strings.Join(words, " ")
return post, ""
}

Expand Down
49 changes: 44 additions & 5 deletions server/plugin_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"regexp"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -11,14 +12,13 @@ import (

func TestMessageWillBePosted(t *testing.T) {
p := Plugin{
badWords: map[string]bool{
"abc": true,
},
configuration: &configuration{
CensorCharacter: "*",
RejectPosts: false,
BadWordsList: "def ghi,abc",
},
}
p.badWordsRegex = regexp.MustCompile(wordListToRegex(p.getConfiguration().BadWordsList))

t.Run("word matches", func(t *testing.T) {
in := &model.Post{
Expand All @@ -35,10 +35,49 @@ func TestMessageWillBePosted(t *testing.T) {

t.Run("word matches case-insensitive", func(t *testing.T) {
in := &model.Post{
Message: "123 ABC 456",
Message: "123 ABC AbC 456",
}
out := &model.Post{
Message: "123 *** 456",
Message: "123 *** *** 456",
}

rpost, s := p.MessageWillBePosted(&plugin.Context{}, in)
assert.Empty(t, s)
assert.Equal(t, out, rpost)
})

t.Run("word with spaces matches", func(t *testing.T) {
in := &model.Post{
Message: "123 def ghi 456",
}
out := &model.Post{
Message: "123 ******* 456",
}

rpost, s := p.MessageWillBePosted(&plugin.Context{}, in)
assert.Empty(t, s)
assert.Equal(t, out, rpost)
})

t.Run("word matches with punctuation", func(t *testing.T) {
in := &model.Post{
Message: "123 abc, 456",
}
out := &model.Post{
Message: "123 ***, 456",
}

rpost, s := p.MessageWillBePosted(&plugin.Context{}, in)
assert.Empty(t, s)
assert.Equal(t, out, rpost)
})

t.Run("word shouldn't match because it in another word", func(t *testing.T) {
in := &model.Post{
Message: "helloabcworld helloabc abchello",
}
out := &model.Post{
Message: "helloabcworld helloabc abchello",
}

rpost, s := p.MessageWillBePosted(&plugin.Context{}, in)
Expand Down