56 lines
1.3 KiB
Go
56 lines
1.3 KiB
Go
package utils
|
|
|
|
import (
|
|
"golang.org/x/net/html"
|
|
"mime"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"net/url"
|
|
"slices"
|
|
)
|
|
|
|
func FetchTags(backend http.Handler, uri *url.URL, kinds ...string) (result []html.Node) {
|
|
writer := httptest.NewRecorder()
|
|
backend.ServeHTTP(writer, &http.Request{
|
|
Method: http.MethodGet,
|
|
URL: uri,
|
|
Header: http.Header{
|
|
"User-Agent": []string{"Mozilla 5.0 (compatible; go-away/1.0 fetch-tags) TwitterBot/1.0"},
|
|
"Accept": []string{"text/html,application/xhtml+xml"},
|
|
},
|
|
Close: true,
|
|
})
|
|
response := writer.Result()
|
|
if response == nil {
|
|
return nil
|
|
}
|
|
defer response.Body.Close()
|
|
if response.StatusCode != http.StatusOK {
|
|
return nil
|
|
}
|
|
|
|
if contentType, _, _ := mime.ParseMediaType(response.Header.Get("Content-Type")); contentType != "text/html" && contentType != "application/xhtml+xml" {
|
|
return nil
|
|
}
|
|
|
|
//TODO: handle non UTF-8 documents
|
|
node, err := html.ParseWithOptions(response.Body, html.ParseOptionEnableScripting(false))
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
for n := range node.Descendants() {
|
|
if n.Type == html.ElementNode && slices.Contains(kinds, n.Data) {
|
|
result = append(result, html.Node{
|
|
Type: n.Type,
|
|
DataAtom: n.DataAtom,
|
|
Data: n.Data,
|
|
Namespace: n.Namespace,
|
|
Attr: n.Attr,
|
|
})
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|