From e7b2fad441fd6092354f47764d340e1223da290e Mon Sep 17 00:00:00 2001 From: alex Date: Sat, 10 Dec 2022 17:58:47 +0100 Subject: [PATCH] verifyEventLink: check cache first to allow more valid URLs previously, a request to an open graph image reported from say https://github.com/example/some.repo would result in 404 not found, a false negative because "is link in event?" verification would fail: "some.repo" looks like a file with .repo extension, not an html page. in this commit, if a client already requested example/some.repo before, and it is cached, the response to the OGP image is successful. --- noxy.go | 52 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/noxy.go b/noxy.go index f2d321e..bbc5269 100644 --- a/noxy.go +++ b/noxy.go @@ -283,25 +283,28 @@ func (x *Noxer) verifyEventLink(ctx context.Context, eventID, relayURL, link str // link not found in the event text/json. // check URLs in OGP metadata for each suitable link found in the event. - for _, urlStr := range eventURLs { - u, err := url.Parse(urlStr) - if err != nil { - continue // invalid url - } - if ext := path.Ext(u.Path); ext != "" { - if !strings.HasSuffix(ext, "html") && !strings.HasSuffix(ext, "htm") { - continue // assume not an html page + for _, urlInEvent := range eventURLs { + // try only cache first. a client may have already requested /meta + // with this URL. if so, need no further parsing and network roundtrips. + var cachedMeta LinkMeta + if x.Cache.GetJSON(ctx, MakeCacheKey(urlInEvent, CacheKeyURLPreview), &cachedMeta) == nil { + if nonSortedSliceContains(cachedMeta.ImageURLs, link) { + return nil // ok; found } + continue // move on to the next url in the event + } + + // cached failed or miss; possibly fetch from remote and parse. + if !looksLikeHTMLPage(urlInEvent) { + continue } - meta, err := x.slurpLinkMeta(ctx, urlStr) + meta, err := x.slurpLinkMeta(ctx, urlInEvent) if err != nil { - log.Printf("verifyEventLink slurpLinkMeta(%s): %v", u, err) + log.Printf("verifyEventLink slurpLinkMeta(%s): %v", urlInEvent, err) continue } - for _, imgURL := range meta.ImageURLs { - if imgURL == link { - return nil - } + if nonSortedSliceContains(meta.ImageURLs, link) { + return nil // ok; found } } return ErrNotFound @@ -591,3 +594,24 @@ func validURL(urlStr string) bool { } return u.Scheme == "" || u.Scheme == "http" || u.Scheme == "https" } + +// O(N) lookup of elem in a +func nonSortedSliceContains(a []string, elem string) bool { + for _, v := range a { + if v == elem { + return true + } + } + return false +} + +// reports whether urlStr looks like a URL of an html webpage. +func looksLikeHTMLPage(urlStr string) bool { + u, err := url.Parse(urlStr) + if err != nil { + return false + } + ext := path.Ext(u.Path) + // any .xxxhtml is ok + return ext == "" || strings.HasSuffix(ext, "html") || strings.HasSuffix(ext, "htm") +}