use reqwest::Client; use soup::prelude::*; use regex::Regex; pub async fn random_words(quantity: i8) -> Vec { let client = Client::new(); let res = client.post("https://randommer.io/word-generator") .body(format!("quantity={quantity}&wordType=0")) .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0") .header("Accept-Language", "en-US") .header("Content-Type","application/x-www-form-urlencoded") .send() .await .unwrap(); serde_json::from_str(res.text().await.unwrap().as_str()).unwrap() } pub async fn ddg_scrape(query: String) -> Vec { let client = Client::new(); let res = client.get(format!("https://duckduckgo.com/html?q={query}")) .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0") .header("Accept-Language", "en-US") .send() .await .unwrap(); let html_text = res.text().await.unwrap(); let soup = Soup::new(&html_text); soup.tag("a").class("result__url").find_all().map(|a| a.get("href").unwrap()).collect() } pub async fn get_content(link: String) -> String { let client = Client::new(); let res = client.get(&link) .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0") .header("Accept-Language", "en-US") .send() .await; let res = match res { Ok(r) => r, Err(e) => { eprintln!("get_content: {e:?}"); return String::new(); } }; let html_text = res.text().await.unwrap(); let soup = Soup::new(&html_text); let mut p: Vec = soup.tag("p").find_all().map(|a| a.text()).collect(); let mut span: Vec = soup.tag("span").find_all().map(|a| a.text()).collect(); p.append(&mut span); let mut texts: Vec = vec![]; let whitespace_re = Regex::new("[\n\r\t]").unwrap(); let tag_re = Regex::new("(<)(/)?[\\w](>)").unwrap(); for e in p { let untagged = tag_re.replace_all(&e, "").to_string(); let normalwhite = whitespace_re.replace_all(&untagged, "\n").to_string(); texts.push(normalwhite); } let mut joined = texts.join(" "); joined = format!("From: {link}\n\n{joined}"); unsafe { joined.slice_unchecked(0, std::cmp::min(8192, joined.len())).to_string() } }