verifyEventLink: check cache first to allow more valid URLs

previously, a request to an open graph image reported from say
https://github.com/example/some.repo would result in 404 not found, a
false negative because "is link in event?" verification would fail:
"some.repo" looks like a file with .repo extension, not an html page.

in this commit, if a client already requested example/some.repo
before, and it is cached, the response to the OGP image is successful.
master
alex 2 years ago
parent 78a1aae7e8
commit e7b2fad441
Signed by: x1ddos
GPG Key ID: FDEFB4A63CBD8460

@ -283,25 +283,28 @@ func (x *Noxer) verifyEventLink(ctx context.Context, eventID, relayURL, link str
// link not found in the event text/json.
// check URLs in OGP metadata for each suitable link found in the event.
for _, urlStr := range eventURLs {
u, err := url.Parse(urlStr)
if err != nil {
continue // invalid url
}
if ext := path.Ext(u.Path); ext != "" {
if !strings.HasSuffix(ext, "html") && !strings.HasSuffix(ext, "htm") {
continue // assume not an html page
for _, urlInEvent := range eventURLs {
// try only cache first. a client may have already requested /meta
// with this URL. if so, need no further parsing and network roundtrips.
var cachedMeta LinkMeta
if x.Cache.GetJSON(ctx, MakeCacheKey(urlInEvent, CacheKeyURLPreview), &cachedMeta) == nil {
if nonSortedSliceContains(cachedMeta.ImageURLs, link) {
return nil // ok; found
}
continue // move on to the next url in the event
}
// cached failed or miss; possibly fetch from remote and parse.
if !looksLikeHTMLPage(urlInEvent) {
continue
}
meta, err := x.slurpLinkMeta(ctx, urlStr)
meta, err := x.slurpLinkMeta(ctx, urlInEvent)
if err != nil {
log.Printf("verifyEventLink slurpLinkMeta(%s): %v", u, err)
log.Printf("verifyEventLink slurpLinkMeta(%s): %v", urlInEvent, err)
continue
}
for _, imgURL := range meta.ImageURLs {
if imgURL == link {
return nil
}
if nonSortedSliceContains(meta.ImageURLs, link) {
return nil // ok; found
}
}
return ErrNotFound
@ -591,3 +594,24 @@ func validURL(urlStr string) bool {
}
return u.Scheme == "" || u.Scheme == "http" || u.Scheme == "https"
}
// O(N) lookup of elem in a
func nonSortedSliceContains(a []string, elem string) bool {
for _, v := range a {
if v == elem {
return true
}
}
return false
}
// reports whether urlStr looks like a URL of an html webpage.
func looksLikeHTMLPage(urlStr string) bool {
u, err := url.Parse(urlStr)
if err != nil {
return false
}
ext := path.Ext(u.Path)
// any .xxxhtml is ok
return ext == "" || strings.HasSuffix(ext, "html") || strings.HasSuffix(ext, "htm")
}

Loading…
Cancel
Save