verifyEventLink: check cache first to allow more valid URLs

previously, a request to an open graph image reported from say
https://github.com/example/some.repo would result in 404 not found, a
false negative because "is link in event?" verification would fail:
"some.repo" looks like a file with .repo extension, not an html page.

in this commit, if a client already requested example/some.repo
before, and it is cached, the response to the OGP image is successful.
master
alex 2 years ago
parent 78a1aae7e8
commit e7b2fad441
Signed by: x1ddos
GPG Key ID: FDEFB4A63CBD8460

@ -283,25 +283,28 @@ func (x *Noxer) verifyEventLink(ctx context.Context, eventID, relayURL, link str
// link not found in the event text/json. // link not found in the event text/json.
// check URLs in OGP metadata for each suitable link found in the event. // check URLs in OGP metadata for each suitable link found in the event.
for _, urlStr := range eventURLs { for _, urlInEvent := range eventURLs {
u, err := url.Parse(urlStr) // try only cache first. a client may have already requested /meta
if err != nil { // with this URL. if so, need no further parsing and network roundtrips.
continue // invalid url var cachedMeta LinkMeta
if x.Cache.GetJSON(ctx, MakeCacheKey(urlInEvent, CacheKeyURLPreview), &cachedMeta) == nil {
if nonSortedSliceContains(cachedMeta.ImageURLs, link) {
return nil // ok; found
} }
if ext := path.Ext(u.Path); ext != "" { continue // move on to the next url in the event
if !strings.HasSuffix(ext, "html") && !strings.HasSuffix(ext, "htm") {
continue // assume not an html page
} }
// cached failed or miss; possibly fetch from remote and parse.
if !looksLikeHTMLPage(urlInEvent) {
continue
} }
meta, err := x.slurpLinkMeta(ctx, urlStr) meta, err := x.slurpLinkMeta(ctx, urlInEvent)
if err != nil { if err != nil {
log.Printf("verifyEventLink slurpLinkMeta(%s): %v", u, err) log.Printf("verifyEventLink slurpLinkMeta(%s): %v", urlInEvent, err)
continue continue
} }
for _, imgURL := range meta.ImageURLs { if nonSortedSliceContains(meta.ImageURLs, link) {
if imgURL == link { return nil // ok; found
return nil
}
} }
} }
return ErrNotFound return ErrNotFound
@ -591,3 +594,24 @@ func validURL(urlStr string) bool {
} }
return u.Scheme == "" || u.Scheme == "http" || u.Scheme == "https" return u.Scheme == "" || u.Scheme == "http" || u.Scheme == "https"
} }
// O(N) lookup of elem in a
func nonSortedSliceContains(a []string, elem string) bool {
for _, v := range a {
if v == elem {
return true
}
}
return false
}
// reports whether urlStr looks like a URL of an html webpage.
func looksLikeHTMLPage(urlStr string) bool {
u, err := url.Parse(urlStr)
if err != nil {
return false
}
ext := path.Ext(u.Path)
// any .xxxhtml is ok
return ext == "" || strings.HasSuffix(ext, "html") || strings.HasSuffix(ext, "htm")
}

Loading…
Cancel
Save