matrix-lol/src/http.rs

70 lines
2.4 KiB
Rust

use reqwest::Client;
use soup::prelude::*;
use regex::Regex;
pub async fn random_words(quantity: i8) -> Vec<String> {
let client = Client::new();
let res = client.post("https://randommer.io/word-generator")
.body(format!("quantity={quantity}&wordType=0"))
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0")
.header("Accept-Language", "en-US")
.header("Content-Type","application/x-www-form-urlencoded")
.send()
.await
.unwrap();
serde_json::from_str(res.text().await.unwrap().as_str()).unwrap()
}
pub async fn ddg_scrape(query: String) -> Vec<String> {
let client = Client::new();
let res = client.get(format!("https://duckduckgo.com/html?q={query}"))
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0")
.header("Accept-Language", "en-US")
.send()
.await
.unwrap();
let html_text = res.text().await.unwrap();
let soup = Soup::new(&html_text);
soup.tag("a").class("result__url").find_all().map(|a| a.get("href").unwrap()).collect()
}
pub async fn get_content(link: String) -> String {
let client = Client::new();
let res = client.get(&link)
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0")
.header("Accept-Language", "en-US")
.send()
.await;
let res = match res {
Ok(r) => r,
Err(e) => { eprintln!("get_content: {e:?}"); return String::new(); }
};
let html_text = res.text().await.unwrap();
let soup = Soup::new(&html_text);
let mut p: Vec<String> = soup.tag("p").find_all().map(|a| a.text()).collect();
let mut span: Vec<String> = soup.tag("span").find_all().map(|a| a.text()).collect();
p.append(&mut span);
let mut texts: Vec<String> = vec![];
let whitespace_re = Regex::new("[\n\r\t]").unwrap();
let tag_re = Regex::new("(<)(/)?[\\w](>)").unwrap();
for e in p {
let untagged = tag_re.replace_all(&e, "").to_string();
let normalwhite = whitespace_re.replace_all(&untagged, "\n").to_string();
texts.push(normalwhite);
}
let mut joined = texts.join(" ");
joined = format!("From: {link}\n\n{joined}");
unsafe { joined.slice_unchecked(0, std::cmp::min(8192, joined.len())).to_string() }
}