70 lines
2.4 KiB
Rust
70 lines
2.4 KiB
Rust
use reqwest::Client;
|
|
use soup::prelude::*;
|
|
use regex::Regex;
|
|
|
|
pub async fn random_words(quantity: i8) -> Vec<String> {
|
|
let client = Client::new();
|
|
|
|
let res = client.post("https://randommer.io/word-generator")
|
|
.body(format!("quantity={quantity}&wordType=0"))
|
|
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0")
|
|
.header("Accept-Language", "en-US")
|
|
.header("Content-Type","application/x-www-form-urlencoded")
|
|
.send()
|
|
.await
|
|
.unwrap();
|
|
|
|
serde_json::from_str(res.text().await.unwrap().as_str()).unwrap()
|
|
}
|
|
|
|
pub async fn ddg_scrape(query: String) -> Vec<String> {
|
|
let client = Client::new();
|
|
|
|
let res = client.get(format!("https://duckduckgo.com/html?q={query}"))
|
|
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0")
|
|
.header("Accept-Language", "en-US")
|
|
.send()
|
|
.await
|
|
.unwrap();
|
|
|
|
let html_text = res.text().await.unwrap();
|
|
let soup = Soup::new(&html_text);
|
|
|
|
soup.tag("a").class("result__url").find_all().map(|a| a.get("href").unwrap()).collect()
|
|
}
|
|
|
|
pub async fn get_content(link: String) -> String {
|
|
let client = Client::new();
|
|
|
|
let res = client.get(&link)
|
|
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0")
|
|
.header("Accept-Language", "en-US")
|
|
.send()
|
|
.await;
|
|
|
|
let res = match res {
|
|
Ok(r) => r,
|
|
Err(e) => { eprintln!("get_content: {e:?}"); return String::new(); }
|
|
};
|
|
|
|
let html_text = res.text().await.unwrap();
|
|
let soup = Soup::new(&html_text);
|
|
|
|
let mut p: Vec<String> = soup.tag("p").find_all().map(|a| a.text()).collect();
|
|
let mut span: Vec<String> = soup.tag("span").find_all().map(|a| a.text()).collect();
|
|
p.append(&mut span);
|
|
|
|
let mut texts: Vec<String> = vec![];
|
|
|
|
let whitespace_re = Regex::new("[\n\r\t]").unwrap();
|
|
let tag_re = Regex::new("(<)(/)?[\\w](>)").unwrap();
|
|
for e in p {
|
|
let untagged = tag_re.replace_all(&e, "").to_string();
|
|
let normalwhite = whitespace_re.replace_all(&untagged, "\n").to_string();
|
|
texts.push(normalwhite);
|
|
}
|
|
|
|
let mut joined = texts.join(" ");
|
|
joined = format!("From: {link}\n\n{joined}");
|
|
unsafe { joined.slice_unchecked(0, std::cmp::min(8192, joined.len())).to_string() }
|
|
} |