Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add forward message detection and spam check #204

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ This option is disabled by default. If set to `true`, the bot will check the mes

This option is disabled by default. If set to `true`, the bot will check the message for the presence of any video or video notes. If the message contains videos but no text, it will be marked as spam.

**Forward check**

This option is disabled by default. If `--meta.forward` set or `env:META_FORWARD` is `true`, the bot will check if the message forwarded. If the message is a forward, it will be marked as spam.

**Multi-language words**

Using words that mix characters from multiple languages is a common spam technique. To detect such messages, the bot can check the message for the presence of such words. This option is disabled by default and can be enabled with the `--multi-lang=, [$MULTI_LANG]` parameter. Setting it to a number above `0` will enable this check, and the bot will mark the message as spam if it contains words with characters from more than one language in more than the specified number of words.
Expand Down Expand Up @@ -277,6 +281,7 @@ meta:
--meta.image-only enable image only check [$META_IMAGE_ONLY]
--meta.links-only enable links only check [$META_LINKS_ONLY]
--meta.video-only enable video only check [$META_VIDEO_ONLY]
--meta.forward enable forward check [$META_FORWARD]

openai:
--openai.token= openai token, disabled if not set [$OPENAI_TOKEN]
Expand Down
1 change: 1 addition & 0 deletions app/bot/bot.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type Message struct {

WithVideo bool `json:",omitempty"`
WithVideoNote bool `json:",omitempty"`
WithForward bool `json:",omitempty"`
}

// Entity represents one special entity in a text message.
Expand Down
3 changes: 3 additions & 0 deletions app/bot/spam.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ func (s *SpamFilter) OnMessage(msg Message, checkOnly bool) (response Response)
if msg.WithVideo || msg.WithVideoNote {
spamReq.Meta.HasVideo = true
}
if msg.WithForward {
spamReq.Meta.HasForward = true
}
spamReq.Meta.Links = strings.Count(msg.Text, "http://") + strings.Count(msg.Text, "https://")
isSpam, checkResults := s.Check(spamReq)
crs := []string{}
Expand Down
2 changes: 2 additions & 0 deletions app/events/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ func transform(msg *tbapi.Message) *bot.Message {
message.WithVideoNote = true
case msg.Story != nil: // telegram story is a sort of video-like thing, mark it as video
message.WithVideo = true
case msg.ForwardOrigin != nil:
message.WithForward = true
}

// fill in the message's reply-to message
Expand Down
56 changes: 56 additions & 0 deletions app/events/events_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,62 @@ func TestTelegramListener_transformTextMessage(t *testing.T) {
},
},
},
{
name: "Message with forward",
input: &tbapi.Message{
Chat: tbapi.Chat{ID: 123456},
From: &tbapi.User{
ID: 100000001,
UserName: "username",
FirstName: "First",
LastName: "Last",
},
MessageID: 30,
Date: 1578627415,
Text: "Forwarded message",
ForwardOrigin: &tbapi.MessageOrigin{Date: time.Unix(1578627415, 0).Unix()},
},
expected: &bot.Message{
ID: 30,
From: bot.User{
ID: 100000001,
Username: "username",
DisplayName: "First Last",
},
Sent: time.Unix(1578627415, 0),
Text: "Forwarded message",
ChatID: 123456,
WithForward: true,
},
},
{
name: "Message with story",
input: &tbapi.Message{
Chat: tbapi.Chat{ID: 123456},
From: &tbapi.User{
ID: 100000001,
UserName: "username",
FirstName: "First",
LastName: "Last",
},
MessageID: 30,
Date: 1578627415,
Text: "Message with story",
Story: &tbapi.Story{},
},
expected: &bot.Message{
ID: 30,
From: bot.User{
ID: 100000001,
Username: "username",
DisplayName: "First Last",
},
Sent: time.Unix(1578627415, 0),
Text: "Message with story",
ChatID: 123456,
WithVideo: true,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
28 changes: 28 additions & 0 deletions app/events/listener_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,34 @@ func TestTelegramListener_DoWithBotBan(t *testing.T) {
assert.Equal(t, "admin", botMock.OnMessageCalls()[0].Msg.From.Username)
assert.False(t, botMock.OnMessageCalls()[0].CheckOnly)
})

t.Run("test spam check for forwarded message", func(t *testing.T) {
mockLogger.ResetCalls()
botMock.ResetCalls()
updMsg := tbapi.Update{
Message: &tbapi.Message{
Chat: tbapi.Chat{ID: 123},
Text: "text 123",
From: &tbapi.User{UserName: "user", ID: 123},
Date: int(time.Date(2020, 2, 11, 19, 35, 55, 9, time.UTC).Unix()),
ForwardOrigin: &tbapi.MessageOrigin{Date: time.Now().Unix()},
},
}

updChan := make(chan tbapi.Update, 1)
updChan <- updMsg
close(updChan)
mockAPI.GetUpdatesChanFunc = func(config tbapi.UpdateConfig) tbapi.UpdatesChannel { return updChan }

err := l.Do(ctx)
assert.EqualError(t, err, "telegram update chan closed")
assert.Equal(t, 1, len(mockLogger.SaveCalls()))
assert.Equal(t, "text 123", mockLogger.SaveCalls()[0].Msg.Text)
assert.True(t, mockLogger.SaveCalls()[0].Msg.WithForward)
require.Equal(t, 1, len(botMock.OnMessageCalls()))
assert.Equal(t, "text 123", botMock.OnMessageCalls()[0].Msg.Text)
assert.True(t, botMock.OnMessageCalls()[0].Msg.WithForward)
})
}

func TestTelegramListener_DoWithBotSoftBan(t *testing.T) {
Expand Down
6 changes: 6 additions & 0 deletions app/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type options struct {
ImageOnly bool `long:"image-only" env:"IMAGE_ONLY" description:"enable image only check"`
LinksOnly bool `long:"links-only" env:"LINKS_ONLY" description:"enable links only check"`
VideosOnly bool `long:"video-only" env:"VIDEO_ONLY" description:"enable video only check"`
Forward bool `long:"forward" env:"FORWARD" description:"enable forward check"`
} `group:"meta" namespace:"meta" env-namespace:"META"`

OpenAI struct {
Expand Down Expand Up @@ -374,6 +375,7 @@ func activateServer(ctx context.Context, opts options, sf *bot.SpamFilter, loc *
MetaLinksOnly: opts.Meta.LinksOnly,
MetaImageOnly: opts.Meta.ImageOnly,
MetaVideoOnly: opts.Meta.VideosOnly,
MetaForwarded: opts.Meta.Forward,
MultiLangLimit: opts.MultiLangWords,
OpenAIEnabled: opts.OpenAI.Token != "" || opts.OpenAI.APIBase != "",
SamplesDataPath: opts.Files.SamplesDataPath,
Expand Down Expand Up @@ -476,6 +478,10 @@ func makeDetector(opts options) *tgspam.Detector {
log.Printf("[INFO] links only check enabled")
metaChecks = append(metaChecks, tgspam.LinkOnlyCheck())
}
if opts.Meta.Forward {
log.Printf("[INFO] forward check enabled")
metaChecks = append(metaChecks, tgspam.ForwardedCheck())
}
detector.WithMetaChecks(metaChecks...)

dynSpamFile := filepath.Join(opts.Files.DynamicDataPath, dynamicSpamFile)
Expand Down
1 change: 1 addition & 0 deletions app/webapi/assets/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ <h2 class="text-center mb-4">Application Settings</h2>
<tr><th>Meta Links Only</th><td>{{.MetaLinksOnly}}</td></tr>
<tr><th>Meta Image Only</th><td>{{.MetaImageOnly}}</td></tr>
<tr><th>Meta Video Only</th><td>{{.MetaVideoOnly}}</td></tr>
<tr><th>Forward Prohibited</th><td>{{.MetaForwarded}}</td></tr>
<tr><th>Multi Lingual Words</th><td>{{.MultiLangLimit}}</td></tr>
<tr><th>OpenAI Enabled</th><td>{{.OpenAIEnabled}}</td></tr>
<tr><th>Samples Data Path</th><td>{{.SamplesDataPath}}</td></tr>
Expand Down
1 change: 1 addition & 0 deletions app/webapi/webapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type Settings struct {
MetaLinksOnly bool `json:"meta_links_only"`
MetaImageOnly bool `json:"meta_image_only"`
MetaVideoOnly bool `json:"meta_video_only"`
MetaForwarded bool `json:"meta_forwarded"`
MultiLangLimit int `json:"multi_lang_limit"`
OpenAIEnabled bool `json:"openai_enabled"`
SamplesDataPath string `json:"samples_data_path"`
Expand Down
7 changes: 4 additions & 3 deletions lib/spamcheck/spamcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ type Request struct {

// MetaData is a meta-info about the message, provided by the client.
type MetaData struct {
Images int `json:"images"` // number of images in the message
Links int `json:"links"` // number of links in the message
HasVideo bool `json:"has_video"` // true if the message has a video or video note
Images int `json:"images"` // number of images in the message
Links int `json:"links"` // number of links in the message
HasVideo bool `json:"has_video"` // true if the message has a video or video note
HasForward bool `json:"has_forward"` // true if the message has a forward
}

func (r *Request) String() string {
Expand Down
6 changes: 3 additions & 3 deletions lib/spamcheck/spamcheck_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,17 @@ func TestRequestString(t *testing.T) {
}{
{
name: "Normal message",
request: Request{"Hello, world!", "123", "Alice", MetaData{2, 1, false}, false},
request: Request{"Hello, world!", "123", "Alice", MetaData{2, 1, false, false}, false},
expected: `msg:"Hello, world!", user:"Alice", id:123, images:2, links:1, has_video:false`,
},
{
name: "Spam message",
request: Request{"Spam message", "456", "Bob", MetaData{0, 3, true}, true},
request: Request{"Spam message", "456", "Bob", MetaData{0, 3, true, false}, true},
expected: `msg:"Spam message", user:"Bob", id:456, images:0, links:3, has_video:true`,
},
{
name: "Empty fields",
request: Request{"", "", "", MetaData{0, 0, false}, false},
request: Request{"", "", "", MetaData{0, 0, false, false}, false},
expected: `msg:"", user:"", id:, images:0, links:0, has_video:false`,
},
}
Expand Down
16 changes: 16 additions & 0 deletions lib/tgspam/metachecks.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,19 @@ func VideosCheck() MetaCheck {
return spamcheck.Response{Spam: false, Name: "videos", Details: "no videos without text"}
}
}

// ForwardedCheck is a function that returns a MetaCheck function.
// It checks if the message is a forwarded message.
func ForwardedCheck() MetaCheck {
return func(req spamcheck.Request) spamcheck.Response {
if req.Meta.HasForward {
return spamcheck.Response{
Name: "forward",
Spam: true,
Details: "forwarded message",
}
}
return spamcheck.Response{Spam: false, Name: "forward", Details: "not forwarded message"}
}

}
Loading
Loading