Better detection system; Detection types

This commit is contained in:
0xf8 2023-04-16 20:43:36 -04:00
parent f7c7526081
commit d94095e8d9
Signed by: 0xf8
GPG Key ID: 446580D758689584
9 changed files with 245 additions and 264 deletions

9
Cargo.lock generated
View File

@ -2067,7 +2067,7 @@ checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
[[package]]
name = "scam-police"
version = "0.5.0"
version = "0.6.0"
dependencies = [
"anyhow",
"dirs",
@ -2078,6 +2078,7 @@ dependencies = [
"rpassword",
"serde",
"serde_json",
"strfmt",
"tokio",
"url",
]
@ -2252,6 +2253,12 @@ dependencies = [
"der",
]
[[package]]
name = "strfmt"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a8348af2d9fc3258c8733b8d9d8db2e56f54b2363a4b5b81585c7875ed65e65"
[[package]]
name = "strsim"
version = "0.10.0"

View File

@ -1,6 +1,6 @@
[package]
name = "scam-police"
version = "0.5.0"
version = "0.6.0"
edition = "2021"
authors = [ "@0xf8:projectsegfau.lt", "@jjj333:pain.agency" ]
@ -16,5 +16,6 @@ reqwest = "0.11.16"
rpassword = "7.2.0"
serde = "1.0.160"
serde_json = "1.0.95"
strfmt = "0.2.4"
tokio = { version = "1.27.0", features = ["macros", "rt-multi-thread"] }
url = "2.3.1"

View File

@ -1,66 +1,52 @@
{
"keywords":{
"verbs":[
"earn",
"make", "making", "made",
"generate",
"win",
"invest",
"cashout",
"sell",
"get",
"pay",
"sections": {
"18+": {
"threshold": 3,
"requiredKeywords": [
"sex",
"meet",
"upload",
"login",
"send",
"join",
"buy",
"check",
"private"
],
"currencies":[
"$", "£", "€",
"money",
"million",
"dollar", "pound", "euro",
"crypto",
"paypal",
"bitcoin", "btc",
"etherium", " eth",
"usd",
"nft",
"token",
"free",
"gift",
"card",
"nude",
"18+",
"pay"
"private",
"pictures",
"cheap"
],
"socials":[
"l.wl.co/",
".app.link/",
"bit.ly/",
"paypal.me/",
"matrix.to/",
"wa.me/",
"t.me/",
"keywords": [
"pay", "pal",
"buy",
"sell",
"message",
"meet",
"check",
"card"
]
},
"Investment": {
"threshold": 4,
"requiredKeywords": [
"tg", "t.me/",
"invest",
"crypto", "market",
"profit",
"my commission",
"cashout", "cash out",
"million",
"cash.app",
"cash app",
"cashapp",
"discord.gg/",
"discord",
"is.gd/",
"telegram",
"whatsapp", "whatapp", "whats app", "what app",
"wickr",
"kik",
"instagram",
"dm me",
"👇", "👆️",
"+1", "+2"
"l.wl.co/", ".app.link/"
],
"keywords": [
"earn", "earning",
"make", "making", "made",
"buy",
"send",
"interested",
"btc", "bitcoin",
"eth", "ethereum", "etherium",
"$", "usd",
"asking me how",
"cash", "whats", "app",
"tele", "gram",
"👇", "👆️"
]
}
}
}

View File

@ -3,16 +3,16 @@
"Ok": null,
"MaybeScam": null,
"LikelyScam": {
"plain": "Watch out, the message you replied to has been detected as a scam! Please don't do anything they ask you to do! Stay safe",
"html": "Watch out, the message you replied to has been detected as a <b>scam</b>! <u>Please don't do anything they ask you to do</u>! Stay safe"
"plain": "Watch out, the message you replied to has been detected as an {scam} scam! Please don't do anything they ask you to do! Stay safe",
"html": "Watch out, the message you replied to has been detected as an <b>{scam} scam</b>! <u>Please don't do anything they ask you to do</u>! Stay safe"
}
},
"message": {
"Ok": null,
"MaybeScam": null,
"LikelyScam": {
"plain": "Warning! This message is likely to be a scam, seeking to lure you in and steal your money! Please visit these resources for more information:\n- https://www.sec.gov/oiea/investor-alerts-and-bulletins/digital-asset-and-crypto-investment-scams-investor-alert\n- https://www.youtube.com/watch?v=gFWaA7mt9oM \n [!mods !modhelp]",
"html": "Warning! This message is likely to be a <b>scam</b>, seeking to lure you in and steal your money! Please visit these resources for more information: <ul><li><a href=\"https://www.sec.gov/oiea/investor-alerts-and-bulletins/digital-asset-and-crypto-investment-scams-investor-alert\">https://www.sec.gov/oiea/investor-alerts-and-bulletins/digital-asset-and-crypto-investment-scams-investor-alert</a></li><li><a href=\"https://www.youtube.com/watch?v=gFWaA7mt9oM\">https://www.youtube.com/watch?v=gFWaA7mt9oM</a></li></ul> [!mods !modhelp]"
"plain": "Warning! This message was detected as an {scam} scam, seeking to lure you in and steal your money! Please visit these resources for more information:\n- https://www.sec.gov/oiea/investor-alerts-and-bulletins/digital-asset-and-crypto-investment-scams-investor-alert\n- https://www.youtube.com/watch?v=gFWaA7mt9oM \n [!mods !modhelp]",
"html": "Warning! This message was detected as an <b>{scam} scam</b>, seeking to lure you in and steal your money! Please visit these resources for more information: <ul><li><a href=\"https://www.sec.gov/oiea/investor-alerts-and-bulletins/digital-asset-and-crypto-investment-scams-investor-alert\">https://www.sec.gov/oiea/investor-alerts-and-bulletins/digital-asset-and-crypto-investment-scams-investor-alert</a></li><li><a href=\"https://www.youtube.com/watch?v=gFWaA7mt9oM\">https://www.youtube.com/watch?v=gFWaA7mt9oM</a></li></ul> [!mods !modhelp]"
}
}
}

View File

@ -1,9 +1,11 @@
use serde_json::Value;
use crate::keywords::KeywordSection;
use matrix_sdk::{room::Joined, ruma::OwnedRoomId};
use serde::{Deserialize, Serialize};
use std::collections::BTreeSet;
use matrix_sdk::{ruma::OwnedRoomId, room::Joined};
use serde_json::Value;
use std::collections::{BTreeMap, BTreeSet};
use tokio::fs;
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)]
pub struct RoomConfig {
#[serde(skip_serializing_if = "Option::is_none")]
@ -16,7 +18,11 @@ pub struct RoomConfig {
impl Default for RoomConfig {
fn default() -> Self {
Self { id: None, no_reply: false, reply_to_scam: false }
Self {
id: None,
no_reply: false,
reply_to_scam: false,
}
}
}
@ -30,7 +36,9 @@ impl RoomConfigController {
pub async fn find(&self, id: OwnedRoomId) -> Option<&RoomConfig> {
let id = Some(id);
for room in self.rooms.to_owned() {
if room.id == id { return Some(self.rooms.get(&room).unwrap()) }
if room.id == id {
return Some(self.rooms.get(&room).unwrap());
}
}
None
@ -45,7 +53,11 @@ impl RoomConfigController {
Ok(self.rooms.insert(config))
}
pub async fn find_or_create(&mut self, room: &Joined, id: OwnedRoomId) -> anyhow::Result<&RoomConfig> {
pub async fn find_or_create(
&mut self,
room: &Joined,
id: OwnedRoomId,
) -> anyhow::Result<&RoomConfig> {
if self.find(id.to_owned()).await.is_none() {
self.create_config(room).await?;
}
@ -54,7 +66,12 @@ impl RoomConfigController {
}
pub async fn save(&mut self, config: &RoomConfig) -> anyhow::Result<()> {
self.rooms.take(self.to_owned().find(config.id.to_owned().unwrap()).await.unwrap());
self.rooms.take(
self.to_owned()
.find(config.id.to_owned().unwrap())
.await
.unwrap(),
);
self.rooms.insert(config.to_owned());
let serialized = serde_json::to_string_pretty(self)?;
@ -65,13 +82,18 @@ impl RoomConfigController {
pub fn restore() -> anyhow::Result<Self> {
if !std::path::Path::new(crate::ROOMS_CONFIG_FILE.to_owned().to_str().unwrap()).exists() {
return Ok(Self { rooms: BTreeSet::<RoomConfig>::new() });
return Ok(Self {
rooms: BTreeSet::<RoomConfig>::new(),
});
}
let serialized = std::fs::read_to_string(crate::ROOMS_CONFIG_FILE.to_owned())?;
let ctrl: Result<RoomConfigController, serde_json::Error> = serde_json::from_str(&serialized);
let ctrl: Result<RoomConfigController, serde_json::Error> =
serde_json::from_str(&serialized);
if ctrl.is_err() {
return Ok(Self { rooms: BTreeSet::<RoomConfig>::new() });
return Ok(Self {
rooms: BTreeSet::<RoomConfig>::new(),
});
}
Ok(ctrl.unwrap())
}
@ -80,24 +102,31 @@ impl RoomConfigController {
#[derive(Debug)]
pub struct Config {
pub keywords: Value,
pub keywords: BTreeMap<String, KeywordSection>,
pub responses: Value,
}
impl Config {
pub fn load() -> Config {
let keywords_reader =
std::fs::File::open("config/keywords.json").expect("Couldn't find keywords.json");
let keywords: Value =
serde_json::from_reader(keywords_reader).expect("Couldn't read keywords");
let responses: Value = serde_json::from_reader(
std::fs::File::open("config/responses.json").expect("Couldn't find responses.json"),
)
.expect("Couldn't read responses");
let responses_reader =
std::fs::File::open("config/responses.json").expect("Couldn't find responses.json");
let responses: Value =
serde_json::from_reader(responses_reader).expect("Couldn't read responses");
let keywords: Value = serde_json::from_reader(
std::fs::File::open("config/keywords.json").expect("Couldn't find keywords.json"),
)
.expect("Couldn't read keywords");
let sections: BTreeMap<String, KeywordSection> = keywords["sections"]
.as_object()
.unwrap()
.into_iter()
.map(|a| (a.0.to_owned(), KeywordSection::load(a.1)))
.collect();
Self {
keywords,
keywords: sections,
responses,
}
}

50
src/debug.rs Normal file
View File

@ -0,0 +1,50 @@
use matrix_sdk::{
room::Joined,
ruma::events::room::message::RoomMessageEventContent,
};
use crate::{
judge::{Judgement, JudgementResult},
keywords::KeywordSection,
CONFIG,
};
pub struct Debug {}
impl Debug {
pub async fn send_debug(judge: &Judgement, room: &Joined) -> anyhow::Result<()> {
let sections = CONFIG.keywords.to_owned();
let mut result_report: Vec<(String, String)> = vec![];
let res: (JudgementResult, Option<KeywordSection>) = (|| {
for section in sections {
let (hits, hit_required) = section.1.find(judge.text.to_owned());
result_report.push((
format!("\"{}\": \"{hits}\", {hit_required} ", section.0),
format!("<code>{}</code>: <code>{hits}</code>, <code>{hit_required}</code>", section.0)
));
if hit_required && hits >= section.1.threshold {
return (JudgementResult::LikelyScam, Some(section.1))
} else if hits >= section.1.threshold {
return (JudgementResult::MaybeScam, Some(section.1))
}
}
(JudgementResult::Ok, None)
})();
let mut full_report: (String, String) = ("".to_string(), "".to_string());
for (plain, html) in result_report {
full_report.0.push_str(format!("{plain}\n").as_str());
full_report.1.push_str(format!("{html}<br>").as_str());
}
let msg = RoomMessageEventContent::text_html(full_report.0, full_report.1);
room.send(msg, None).await.expect("Couldn't send message");
Ok(())
}
}

View File

@ -1,5 +1,5 @@
use crate::{
keywords::{KeywordCategory, Keywords},
keywords::KeywordSection,
config::RoomConfig,
CONFIG,
};
@ -7,9 +7,11 @@ use matrix_sdk::{
room::Joined,
ruma::events::room::message::{OriginalRoomMessageEvent, RoomMessageEventContent},
};
use std::collections::HashMap;
use strfmt::strfmt;
use serde_json::json;
#[derive(Debug)]
#[derive(Debug, PartialEq, Eq)]
pub enum JudgementResult {
Ok,
MaybeScam, // hit atleast one category
@ -31,93 +33,27 @@ pub struct Judgement {
}
impl Judgement {
pub fn judge(&self, config: &RoomConfig) -> anyhow::Result<JudgementResult> {
// Load keywords
let mut keywords = CONFIG.keywords.clone();
let keywords = keywords
.as_object_mut()
.unwrap()
.get_mut("keywords")
.unwrap();
pub fn judge(&self, _config: &RoomConfig) -> anyhow::Result<(JudgementResult, Option<(String, KeywordSection)>)> {
let sections = CONFIG.keywords.to_owned();
// Turn json into Keywords
let verbs = Keywords::create("verbs", &keywords["verbs"]);
let currencies = Keywords::create("currencies", &keywords["currencies"]);
let socials = Keywords::create("socials", &keywords["socials"]);
for section in sections {
let (hits, hit_required) = section.1.find(self.text.to_owned());
// Count occurences
let mut counter = KeywordCategory::create_counter_map();
counter.insert(KeywordCategory::Verb, verbs.find(&self.text));
counter.insert(KeywordCategory::Currency, currencies.find(&self.text));
counter.insert(KeywordCategory::Social, socials.find(&self.text));
let mut count_all = 0;
let total = counter.len();
for (_category, count) in counter.to_owned() {
if count > 0 {
count_all = count_all + 1;
if hit_required && hits >= section.1.threshold {
return Ok((JudgementResult::LikelyScam, Some(section)))
} else if hits >= section.1.threshold {
return Ok((JudgementResult::MaybeScam, Some(section)))
}
}
if count_all == 0 {
return Ok(JudgementResult::Ok);
};
if count_all < total {
return Ok(JudgementResult::MaybeScam);
};
Ok(JudgementResult::LikelyScam)
}
pub async fn send_debug(&self, config: &RoomConfig, room: &Joined) -> anyhow::Result<()> {
// Load keywords
let mut keywords = CONFIG.keywords.clone();
let keywords = keywords
.as_object_mut()
.unwrap()
.get_mut("keywords")
.unwrap();
// Turn json into Keywords
let verbs = Keywords::create("verbs", &keywords["verbs"]);
let currencies = Keywords::create("currencies", &keywords["currencies"]);
let socials = Keywords::create("socials", &keywords["socials"]);
// Count occurences
let mut counter = KeywordCategory::create_counter_map();
counter.insert(KeywordCategory::Verb, verbs.find(&self.text));
counter.insert(KeywordCategory::Currency, currencies.find(&self.text));
counter.insert(KeywordCategory::Social, socials.find(&self.text));
let mut count_all = 0;
let total = counter.len();
for (_category, count) in counter.to_owned() {
if count > 0 {
count_all = count_all + 1;
}
}
let mut result = JudgementResult::LikelyScam;
if count_all < total {
result = JudgementResult::MaybeScam
}
if count_all == 0 {
result = JudgementResult::Ok
}
// Send message
let msg = RoomMessageEventContent::text_html(
format!("{counter:?}\nCategories covered: {count_all}/{total}\nVerdict: {result:?}\n{config:?}"),
format!("<code>{counter:?}</code><br>Categories covered: <code>{count_all}/{total}</code><br>Verdict: <code>{result:?}</code><br><code>{config:?}</code>"));
room.send(msg, None).await.expect("Couldn't send message");
Ok(())
Ok((JudgementResult::Ok, None))
}
pub async fn alert(
config: &RoomConfig,
room: &Joined,
event: &OriginalRoomMessageEvent,
result: JudgementResult,
result: (JudgementResult, Option<(String, KeywordSection)>),
is_reply: bool,
) -> anyhow::Result<()> {
if config.no_reply {
@ -129,29 +65,31 @@ impl Judgement {
// Determine which message to send
let section = if is_reply {
responses["reply"].as_object().unwrap()
responses["reply"].to_owned()
} else {
responses["message"].as_object().unwrap()
responses["message"].to_owned()
};
let response_type = section.get(result.to_json_var()).unwrap();
let response_type = section.get(result.0.to_json_var()).unwrap();
if response_type.is_null() {
anyhow::bail!("Called alert with result that has no detection message");
}
let response_type = response_type.as_object().unwrap();
let plain = response_type["plain"].as_str().unwrap();
let html = response_type["html"].as_str().unwrap();
let scam_type = result.1.unwrap().0;
let mut args: HashMap<String, &str> = HashMap::new();
args.insert("scam".to_string(), &scam_type);
let plain = strfmt(response_type["plain"].as_str().unwrap(), &args).unwrap();
let html = strfmt(response_type["html"].as_str().unwrap(), &args).unwrap();
// Send message
let msg = RoomMessageEventContent::text_html(plain, html);
if config.reply_to_scam {
let reply = msg.make_reply_to(event);
room.send(reply, None).await.expect("Couldn't send message");
let msg = if config.reply_to_scam {
msg.make_reply_to(event)
} else {
msg
};
room.send(msg, None).await.expect("Couldn't send message");
}
// Send reaction
if !is_reply {
@ -163,8 +101,7 @@ impl Judgement {
"key": "🚨🚨 SCAM 🚨🚨"
}}),
"m.reaction",
None,
)
None)
.await
.expect("Couldn't send reaction");
}

View File

@ -1,87 +1,53 @@
use serde_json::Value;
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum KeywordCategory {
Verb,
Currency,
Social,
}
impl std::fmt::Display for KeywordCategory {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use KeywordCategory::*;
match self {
Verb => write!(f, "Verb"),
Currency => write!(f, "Currency"),
Social => write!(f, "Social"),
}
}
#[derive(Debug, Clone)]
pub struct KeywordSection {
pub threshold: u64,
pub required: Vec<String>,
pub keywords: Vec<String>,
}
impl KeywordCategory {
pub fn to_json_var(&self) -> &str {
use KeywordCategory::*;
match self {
Verb => "verbs",
Currency => "currencies",
Social => "socials",
}
}
pub fn from_json_var(var: &str) -> Result<Self, ()> {
use KeywordCategory::*;
match var {
"verbs" => Ok(Verb),
"currencies" => Ok(Currency),
"socials" => Ok(Social),
_ => Err(()),
}
}
pub fn create_counter_map() -> HashMap<KeywordCategory, u64> {
use KeywordCategory::*;
let mut map: HashMap<KeywordCategory, u64> = HashMap::new();
map.insert(Verb, 0);
map.insert(Currency, 0);
map.insert(Social, 0);
map
}
}
pub struct Keywords {
pub category: KeywordCategory,
pub words: Vec<Value>,
}
impl Keywords {
pub fn create(name: &str, v: &Value) -> Self {
let v = v.as_array().unwrap();
let Ok(category) = KeywordCategory::from_json_var(name) else {
panic!("Couldn't translate \"{name}\" to KeywordCategory");
};
impl KeywordSection {
pub fn load(json: &Value) -> Self {
let threshold: u64 = json["threshold"].as_u64().unwrap();
let required: Vec<String> = json["requiredKeywords"]
.as_array()
.unwrap()
.into_iter()
.map(|a| a.as_str().unwrap().to_string())
.collect();
let keywords: Vec<String> = json["keywords"]
.as_array()
.unwrap()
.into_iter()
.map(|a| a.as_str().unwrap().to_string())
.collect();
Self {
category,
words: v.to_vec(),
threshold,
required,
keywords,
}
}
pub fn find(&self, hay: &str) -> u64 {
pub fn find(&self, s: String) -> (u64, bool) {
let mut hits: u64 = 0;
let mut hit_required: bool = false;
for kw in self.words.to_owned().into_iter() {
let kw = kw.as_str().unwrap();
if hay.contains(kw) {
hits += 1
for kw in self.keywords.to_owned() {
if s.contains(&kw) {
hits += 1;
}
}
hits
for rkw in self.required.to_owned() {
if s.contains(&rkw) {
hits += 1;
hit_required = true;
}
}
(hits, hit_required)
}
}

View File

@ -15,6 +15,8 @@ pub mod config;
pub mod judge;
pub mod keywords;
pub mod matrix;
pub mod debug;
static DATA_DIR: Lazy<PathBuf> = Lazy::new(|| dirs::data_dir().expect("No data_dir found").join("scam_police"));
static SESSION_FILE: Lazy<PathBuf> = Lazy::new(|| DATA_DIR.join("session"));
@ -22,6 +24,7 @@ static ROOMS_CONFIG_FILE: Lazy<PathBuf> = Lazy::new(|| DATA_DIR.join("rooms_conf
static CONFIG: Lazy<config::Config> = Lazy::new(|| config::Config::load());
async fn on_room_message(event: OriginalSyncRoomMessageEvent, room: Room) -> anyhow::Result<()> {
if let Room::Joined(room) = room {
let mut room_ctrl = config::RoomConfigController::restore().expect("Couldn't restore room configs");
@ -108,11 +111,12 @@ async fn on_room_message(event: OriginalSyncRoomMessageEvent, room: Room) -> any
let reply_judgement = judge::Judgement { text: content };
if debug {
reply_judgement.send_debug(&config, &room).await?;
debug::Debug::send_debug(&reply_judgement, &room).await?;
return Ok(());
}
match reply_judgement.judge(&config)? {
let judgement = reply_judgement.judge(&config)?;
match judgement.0 {
judge::JudgementResult::Ok => (),
judge::JudgementResult::MaybeScam => (),
judge::JudgementResult::LikelyScam => {
@ -120,7 +124,7 @@ async fn on_room_message(event: OriginalSyncRoomMessageEvent, room: Room) -> any
&config,
&room,
&orig_event,
judge::JudgementResult::LikelyScam,
judgement,
true,
)
.await?;
@ -130,7 +134,8 @@ async fn on_room_message(event: OriginalSyncRoomMessageEvent, room: Room) -> any
}
}
match judgement.judge(&config)? {
let judgement = judgement.judge(&config)?;
match judgement.0 {
judge::JudgementResult::Ok => return Ok(()),
judge::JudgementResult::MaybeScam => return Ok(()),
judge::JudgementResult::LikelyScam => {
@ -138,7 +143,7 @@ async fn on_room_message(event: OriginalSyncRoomMessageEvent, room: Room) -> any
&config,
&room,
&orig_event,
judge::JudgementResult::LikelyScam,
judgement,
false,
)
.await?;