From 813d0501bdd10b50a7c1a24fb82594c8baaa42ca Mon Sep 17 00:00:00 2001 From: alex Date: Mon, 5 Dec 2022 23:39:37 +0100 Subject: [PATCH] basic implementation of the last api version the meat is in Noxer struct, in noxy.go. executable server entry point is in cmd/noxy/main.go. all request responses are cached using a rudimentary filesystem based caching. a max storage quota is not implemented yet. --- README.md | 31 ++- cache.go | 217 +++++++++++++++++ cmd/noxy/main.go | 206 ++++++++++++++++ go.mod | 21 ++ go.sum | 38 +++ io.go | 53 +++++ io_test.go | 37 +++ noxy.go | 593 +++++++++++++++++++++++++++++++++++++++++++++++ noxy_test.go | 414 +++++++++++++++++++++++++++++++++ tools/release.sh | 6 + 10 files changed, 1615 insertions(+), 1 deletion(-) create mode 100644 cache.go create mode 100644 cmd/noxy/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 io.go create mode 100644 io_test.go create mode 100644 noxy.go create mode 100644 noxy_test.go create mode 100755 tools/release.sh diff --git a/README.md b/README.md index 98277a1..0d0e4e9 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ and `og:description`, respectively. future versions may also employ [oEmbed](https://oembed.com/) and other metadata formats parsing. the web page at `url` must be served with `text/html` content type. if the page -contains no or empty OGP metadata, noxy response with 200 OK and blank fields. +contains no or empty OGP metadata, noxy responds with 200 OK and blank fields. otherwise, the response is a 4xx status code. ### /data @@ -123,6 +123,35 @@ now, fetch the preview image. note the change from `/meta` to `/data` endpoint: [event raw data →](https://nostr.com/e/af30dac1d800acc25b87d0d6d0dd33bddf49e7f356556540a6c7722e3cb363fe) +## development +the binary's entry point is [cmd/noxy/main.go](cmd/noxy/main.go). +it imports packages in the root of the repo. the actual proxy is implemented +by `Noxer` in [noxy.go](noxy.go). +running the server locally: + mkdir cache + go run ./cmd/noxy -cachedir $PWD/cache + +before sending a patch, make sure the code is passing tests: + + go test -race + +files are formatted: + + go fmt ./... + +and the go module file is updated: + + go mod tidy + +## release + +a release binary is built using the following script. +it produces `noxy` executable in the root of the repo. + + ./tools/release.sh + +before making a new release, you'll probably want to create a new git tag. +the tag is used as the noxy version, also printed when `-V` flag is specified. diff --git a/cache.go b/cache.go new file mode 100644 index 0000000..86aee21 --- /dev/null +++ b/cache.go @@ -0,0 +1,217 @@ +package noxy + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "io" + "io/ioutil" + "mime" + "os" + "path/filepath" +) + +var ErrCacheMiss = errors.New("cache miss") + +// DataStream is an io.Reader augmented with a mime type. +type DataStream struct { + ContentType string + r io.Reader +} + +// MimeType reports the mime type as parsed from ds.ContentType, ignoring +// any errors resulting from parsing optional media parameters. +func (ds DataStream) MimeType() string { + mtype, _, err := mime.ParseMediaType(ds.ContentType) + if err != nil && errors.Is(err, mime.ErrInvalidMediaParameter) { + return ds.ContentType + } + return mtype +} + +func (ds DataStream) Read(p []byte) (n int, err error) { + return ds.r.Read(p) +} + +func (ds DataStream) Close() error { + if closer, ok := ds.r.(io.Closer); ok { + return closer.Close() + } + return nil +} + +// Cacher is used by Noxer to store and use meta info in JSON format, and stream files. +type Cacher interface { + GetJSON(ctx context.Context, key CacheKey, dst any) error + PutJSON(ctx context.Context, key CacheKey, v any) error + GetStream(ctx context.Context, key CacheKey) (*DataStream, error) + PutStream(ctx context.Context, key CacheKey, mimeType string, r io.Reader) error +} + +// CacheKeyType allows CacheKey to segregate data based on their logical types. +type CacheKeyType byte + +const ( + _ CacheKeyType = 1 << iota + CacheKeyEvent // nostr event links + CacheKeyURLPreview // OG link preview metadata + CacheKeyData // actual url data +) + +// CacheKey is 33 bytes long, encoding its CacheKeyType at index 32. +type CacheKey []byte + +// MakeCacheKey creates a new cache key based on sha256 of s and the logical data type. +func MakeCacheKey(s string, typ CacheKeyType) CacheKey { + h := sha256.Sum256([]byte(s)) + return append(h[:], byte(typ)) +} + +// String returns a key representation without its CacheKeyType. +func (k CacheKey) String() string { + return hex.EncodeToString(k[:len(k)-1]) +} + +// Path result is suitable as a filesystem path for storing a cache entry. +func (k CacheKey) Path() string { + h := hex.EncodeToString(k[:len(k)-1]) + return filepath.Join(k.Namespace(), h[0:4], h) +} + +// Namespace is a string representation of the key's CacheKeyType. +func (k CacheKey) Namespace() string { + typ := byte(0) + if n := len(k); n > 0 { + typ = k[n-1] + } + switch CacheKeyType(typ) { + default: + return "" + case CacheKeyEvent: + return "event" + case CacheKeyURLPreview: + return "preview" + case CacheKeyData: + return "data" + } +} + +// DirCache implements Cacher using regular filesystem operations. +// it places all data under subdirectories of the Root. +// +// TODO: cap at max storage size +// TODO: max size per key +type DirCache struct { + Root string +} + +const dirCacheMetaSuffix = ".meta.json" + +func (d DirCache) makeFilepath(key CacheKey, mkdir bool) string { + p := filepath.Join(d.Root, key.Path()) + if mkdir { + os.MkdirAll(filepath.Dir(p), 0700) + } + return p +} + +func (d DirCache) makeTemp(key CacheKey) string { + p := filepath.Join(d.Root, "tmp", key.Path()) + os.MkdirAll(filepath.Dir(p), 0700) + return p +} + +func (d DirCache) GetJSON(ctx context.Context, key CacheKey, dst any) error { + b, err := os.ReadFile(d.makeFilepath(key, false)) + switch { + case err != nil && errors.Is(err, os.ErrNotExist): + return ErrCacheMiss + case err != nil: + return err + } + return json.Unmarshal(b, dst) +} + +func (d DirCache) PutJSON(ctx context.Context, key CacheKey, v any) error { + b, err := json.Marshal(v) + if err != nil { + return err + } + return ioutil.WriteFile(d.makeFilepath(key, true), b, 0600) +} + +func (d DirCache) GetStream(ctx context.Context, key CacheKey) (*DataStream, error) { + filepath := d.makeFilepath(key, false) + f, err := os.Open(filepath) + switch { + case err != nil && errors.Is(err, os.ErrNotExist): + return nil, ErrCacheMiss + case err != nil: + return nil, err + } + mb, err := os.ReadFile(filepath + dirCacheMetaSuffix) + if err != nil { + f.Close() + return nil, err + } + var ds DataStream + if err := json.Unmarshal(mb, &ds); err != nil { + f.Close() + return nil, err + } + ds.r = f + return &ds, nil +} + +func (d DirCache) PutStream(ctx context.Context, key CacheKey, mimeType string, r io.Reader) error { + ds := DataStream{ContentType: mimeType} + mb, err := json.Marshal(ds) + if err != nil { + return err + } + tmpfile := d.makeTemp(key) + tmpmeta := tmpfile + dirCacheMetaSuffix + if err := ioutil.WriteFile(tmpmeta, mb, 0600); err != nil { + return err + } + + f, err := os.OpenFile(tmpfile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600) + if err != nil { + return err + } + if _, err := io.Copy(f, r); err != nil { + f.Close() + return err + } + if err := f.Close(); err != nil { + return err + } + + filepath := d.makeFilepath(key, true) + if err := os.Rename(tmpfile, filepath); err != nil { + return err + } + return os.Rename(tmpmeta, filepath+dirCacheMetaSuffix) +} + +// NullCache stores no data. +var NullCache nullCache + +type nullCache struct{} + +func (nc nullCache) GetJSON(context.Context, CacheKey, any) error { + return ErrCacheMiss +} +func (nc nullCache) PutJSON(context.Context, CacheKey, any) error { + return nil +} + +func (nc nullCache) GetStream(context.Context, CacheKey) (*DataStream, error) { + return nil, ErrCacheMiss +} +func (nc nullCache) PutStream(ctx context.Context, k CacheKey, mtype string, r io.Reader) error { + _, err := io.Copy(io.Discard, r) + return err +} diff --git a/cmd/noxy/main.go b/cmd/noxy/main.go new file mode 100644 index 0000000..bd9c4fb --- /dev/null +++ b/cmd/noxy/main.go @@ -0,0 +1,206 @@ +package main + +import ( + "encoding/json" + "errors" + "flag" + "fmt" + "io" + "log" + "net/http" + "net/url" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "git.qcode.ch/nostr/noxy" +) + +var noxyVersion = "dev" // overwritten by linker flags in release build + +var ( + listenAddr = flag.String("addr", "127.0.0.1:8889", "listen address") + cacheDir = flag.String("cachedir", "/tmp", "absolute cache dir") + maxFileSize = flag.Int64("maxfilesize", 1<<23, "refuse to handle files larger than this, in bytes") + idleRelayTimeout = flag.Duration("idlerelaytimeout", 10*time.Minute, "remove relay connections after idling this long") + showVersion = flag.Bool("V", false, "print version and exit") + + // the -relay flag, populated by parseRelayFlag. + // set to defaultKnownRelays if empty. + knownRelays []string + defaultKnownRelays = []string{ + "nostr.x1ddos.ch", + // from https://nostr.info/relays/ + "expensive-relay.fiatjaf.com", + "nostr-pub.semisol.dev", + "nostr-pub.wellorder.net", + "nostr-relay-dev.wlvs.space", + "nostr-relay.wlvs.space", + "nostr-verified.wellorder.net", + "nostr.bitcoiner.social", + "nostr.delo.software", + "nostr.ono.re", + "nostr.onsats.org", + "nostr.openchain.fr", + "nostr.oxtr.dev", + "nostr.rdfriedl.com", + "nostr.semisol.dev", + "nostr.zaprite.io", + "relay.cynsar.foundation", + "relay.damus.io", + "relay.farscapian.com", + "relay.minds.com", + "relay.nostr.info", + "relay.oldcity-bitcoiners.info", + "relay.sovereign-stack.org", + } +) + +func init() { + flag.Func("relay", "a comma separated nostr relays noxy is allowed to connect to", parseRelayFlag) + sort.Strings(defaultKnownRelays) +} + +func parseRelayFlag(v string) error { + for _, s := range strings.FieldsFunc(v, func(r rune) bool { return r == ',' }) { + s = strings.TrimSpace(s) + if s == "" { + continue + } + u, err := url.Parse(s) + if err != nil { + return fmt.Errorf("invalid relay URL %s: %w", s, err) + } + host := u.Hostname() + if host == "" { + return fmt.Errorf("invalid relay URL %s: no hostname", s) + } + knownRelays = append(knownRelays, host) + } + return nil +} + +func usage() { + w := flag.CommandLine.Output() + fmt.Fprintf(w, "usage of %s:\n", os.Args[0]) + flag.PrintDefaults() + fmt.Fprintln(w, "\nthe -relay flag may be specified multiple times.") + fmt.Fprintf(w, "its default value is the following list:\n\n") + fmt.Fprintln(w, strings.Join(defaultKnownRelays, "\n")) +} + +// set up in main and used by handleXxx HTTP server handlers. +var noxer *noxy.Noxer + +func main() { + flag.Usage = usage + flag.Parse() + if *showVersion { + fmt.Println(noxyVersion) + return + } + if len(knownRelays) == 0 { + knownRelays = defaultKnownRelays + } + sort.Strings(knownRelays) + if !filepath.IsAbs(*cacheDir) { + log.Fatal("cache dir must be absolute path") + } + + noxer = &noxy.Noxer{ + MaxFileSize: *maxFileSize, + IdleRelayTimeout: *idleRelayTimeout, + KnownRelays: knownRelays, + Cache: noxy.DirCache{Root: *cacheDir}, + HTTPClient: &http.Client{Transport: &http.Transport{ + MaxIdleConns: 100, + MaxConnsPerHost: 2, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + }}, + } + + mux := http.NewServeMux() + mux.Handle("/", http.HandlerFunc(handleRoot)) + mux.Handle("/meta", http.HandlerFunc(handleMeta)) + mux.Handle("/data", http.HandlerFunc(handleData)) + + log.Printf("listening on %s", *listenAddr) + log.Printf("known relays: %s", strings.Join(knownRelays, ", ")) + http.ListenAndServe(*listenAddr, logHandler(mux)) +} + +// handles requests to / +func handleRoot(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "this is noxy version %s\nhttps://git.qcode.ch/nostr/noxy\n", noxyVersion) +} + +// handles requests to /meta +func handleMeta(w http.ResponseWriter, r *http.Request) { + if r.Method != "GET" { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + eventID := r.FormValue("id") + relayURL := r.FormValue("relay") + linkURL := r.FormValue("url") + meta, err := noxer.FetchLinkMeta(r.Context(), eventID, relayURL, linkURL) + if err != nil { + writeError(w, err) + return + } + res := struct { + Type string `json:"type"` + Title string `json:"title"` + Descr string `json:"descr"` + Images []string `json:"images"` + }{ + Type: meta.Type, + Title: meta.Title, + Descr: meta.Description, + Images: meta.ImageURLs, + } + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(res) +} + +// handles requests to /data +func handleData(w http.ResponseWriter, r *http.Request) { + if r.Method != "GET" { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + eventID := r.FormValue("id") + relayURL := r.FormValue("relay") + linkURL := r.FormValue("url") + ds, err := noxer.StreamLinkData(r.Context(), eventID, relayURL, linkURL) + if err != nil { + writeError(w, err) + return + } + defer ds.Close() + w.Header().Set("Content-Type", ds.ContentType) + io.Copy(w, ds) +} + +func writeError(w http.ResponseWriter, err error) { + log.Printf("ERROR: %v", err) + w.Header().Set("Content-Type", "text/plain") + switch { + default: + w.WriteHeader(http.StatusBadRequest) + case errors.Is(err, noxy.ErrNotFound): + w.WriteHeader(http.StatusNotFound) + } + fmt.Fprint(w, err.Error()) +} + +func logHandler(h http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + log.Printf("%s %s", r.Method, r.RequestURI) + h.ServeHTTP(w, r) + }) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..369a7a6 --- /dev/null +++ b/go.mod @@ -0,0 +1,21 @@ +module git.qcode.ch/nostr/noxy + +go 1.18 + +require ( + github.com/dyatlov/go-opengraph/opengraph v0.0.0-20220524092352-606d7b1e5f8a + github.com/nbd-wtf/go-nostr v0.8.1 + golang.org/x/net v0.0.0-20220520000938-2e3eb7b945c2 + mvdan.cc/xurls/v2 v2.4.0 +) + +require ( + github.com/SaveTheRbtz/generic-sync-map-go v0.0.0-20220414055132-a37292614db8 // indirect + github.com/btcsuite/btcd/btcec/v2 v2.2.0 // indirect + github.com/btcsuite/btcd/chaincfg/chainhash v1.0.1 // indirect + github.com/decred/dcrd/crypto/blake256 v1.0.0 // indirect + github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect + github.com/gorilla/websocket v1.4.2 // indirect + github.com/valyala/fastjson v1.6.3 // indirect + golang.org/x/exp v0.0.0-20221106115401-f9659909a136 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..5908c4b --- /dev/null +++ b/go.sum @@ -0,0 +1,38 @@ +github.com/SaveTheRbtz/generic-sync-map-go v0.0.0-20220414055132-a37292614db8 h1:Xa6tp8DPDhdV+k23uiTC/GrAYOe4IdyJVKtob4KW3GA= +github.com/SaveTheRbtz/generic-sync-map-go v0.0.0-20220414055132-a37292614db8/go.mod h1:ihkm1viTbO/LOsgdGoFPBSvzqvx7ibvkMzYp3CgtHik= +github.com/btcsuite/btcd/btcec/v2 v2.2.0 h1:fzn1qaOt32TuLjFlkzYSsBC35Q3KUjT1SwPxiMSCF5k= +github.com/btcsuite/btcd/btcec/v2 v2.2.0/go.mod h1:U7MHm051Al6XmscBQ0BoNydpOTsFAn707034b5nY8zU= +github.com/btcsuite/btcd/chaincfg/chainhash v1.0.1 h1:q0rUy8C/TYNBQS1+CGKw68tLOFYSNEs0TFnxxnS9+4U= +github.com/btcsuite/btcd/chaincfg/chainhash v1.0.1/go.mod h1:7SFka0XMvUgj3hfZtydOrQY2mwhPclbT2snogU7SQQc= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK09Y2A4Xv7EE0= +github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc= +github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc= +github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs= +github.com/dyatlov/go-opengraph/opengraph v0.0.0-20220524092352-606d7b1e5f8a h1:etIrTD8BQqzColk9nKRusM9um5+1q0iOEJLqfBMIK64= +github.com/dyatlov/go-opengraph/opengraph v0.0.0-20220524092352-606d7b1e5f8a/go.mod h1:emQhSYTXqB0xxjLITTw4EaWZ+8IIQYw+kx9GqNUKdLg= +github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= +github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/nbd-wtf/go-nostr v0.8.1 h1:DCbLiF1r3xHKBQA1Noz+97ra/B9AcftTh9w+syg3KzM= +github.com/nbd-wtf/go-nostr v0.8.1/go.mod h1:IIT/16QZ/nzi5cgQFU2WJrezYPNRi0iNgiitYMiu8UQ= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/rogpeppe/go-internal v1.8.1/go.mod h1:JeRgkft04UBgHMgCIwADu4Pn6Mtm5d4nPKWu0nJ5d+o= +github.com/valyala/fastjson v1.6.3 h1:tAKFnnwmeMGPbwJ7IwxcTPCNr3uIzoIj3/Fh90ra4xc= +github.com/valyala/fastjson v1.6.3/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= +golang.org/x/exp v0.0.0-20221106115401-f9659909a136 h1:Fq7F/w7MAa1KJ5bt2aJ62ihqp9HDcRuyILskkpIAurw= +golang.org/x/exp v0.0.0-20221106115401-f9659909a136/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= +golang.org/x/net v0.0.0-20220520000938-2e3eb7b945c2 h1:NWy5+hlRbC7HK+PmcXVUmW1IMyFce7to56IUvhUFm7Y= +golang.org/x/net v0.0.0-20220520000938-2e3eb7b945c2/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +mvdan.cc/xurls/v2 v2.4.0 h1:tzxjVAj+wSBmDcF6zBB7/myTy3gX9xvi8Tyr28AuQgc= +mvdan.cc/xurls/v2 v2.4.0/go.mod h1:+GEjq9uNjqs8LQfM9nVnM8rff0OQ5Iash5rzX+N1CSg= diff --git a/io.go b/io.go new file mode 100644 index 0000000..ccd1dd5 --- /dev/null +++ b/io.go @@ -0,0 +1,53 @@ +package noxy + +import ( + "bufio" + "errors" + "io" +) + +var ErrSizeLimitExceeded = errors.New("size limit exceeded") + +// HardLimitReader reads from r up to max bytes. +// it returns ErrSizeLimitExceeded if the number of read bytes is equal or exceeds max. +func HardLimitReader(r io.Reader, max int64) io.Reader { + return &hardLimitReader{r, max} +} + +type hardLimitReader struct { + r io.Reader + n int64 // remaining bytes +} + +func (l *hardLimitReader) Read(p []byte) (n int, err error) { + if l.n <= 0 { + return 0, ErrSizeLimitExceeded + } + if int64(len(p)) > l.n { + p = p[0:l.n] + } + n, err = l.r.Read(p) + l.n -= int64(n) + return +} + +// SinkTeeReader stops writing to w at the first encountered write error +// but continues to propagate reads. the underlying writer is always buffered. +func SinkTeeReader(r io.Reader, w io.Writer) io.Reader { + return &sinkTeeReader{r: r, w: bufio.NewWriter(w)} +} + +type sinkTeeReader struct { + r io.Reader + w *bufio.Writer + werr error +} + +func (st *sinkTeeReader) Read(p []byte) (n int, err error) { + n, err = st.r.Read(p) + if n > 0 && st.werr == nil { + _, st.werr = st.w.Write(p[:n]) + st.werr = st.w.Flush() + } + return +} diff --git a/io_test.go b/io_test.go new file mode 100644 index 0000000..50de827 --- /dev/null +++ b/io_test.go @@ -0,0 +1,37 @@ +package noxy + +import ( + "bytes" + "testing" + "testing/iotest" +) + +// ErrorWriter always returns Err on Write calls. +type ErrorWriter struct { + Err error +} + +func (ew ErrorWriter) Write([]byte) (int, error) { + return 0, ew.Err +} + +func TestSinkTeeReader(t *testing.T) { + const text = "hello" + var w bytes.Buffer + tee := SinkTeeReader(bytes.NewBufferString(text), &w) + + if err := iotest.TestReader(tee, []byte(text)); err != nil { + t.Errorf("tee reader: %v", err) + } + if v := string(w.Bytes()); v != text { + t.Errorf("b2 = %q; want %q", v, text) + } +} + +func TestSinkTeeReaderErrWriter(t *testing.T) { + const text = "hello" + tee := SinkTeeReader(bytes.NewBufferString(text), ErrorWriter{iotest.ErrTimeout}) + if err := iotest.TestReader(tee, []byte(text)); err != nil { + t.Errorf("tee reader: %v", err) + } +} diff --git a/noxy.go b/noxy.go new file mode 100644 index 0000000..f2d321e --- /dev/null +++ b/noxy.go @@ -0,0 +1,593 @@ +package noxy + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "net/http" + "net/url" + "path" + "sort" + "strings" + "sync" + "time" + + "github.com/dyatlov/go-opengraph/opengraph" + nostr "github.com/nbd-wtf/go-nostr" + xurls "mvdan.cc/xurls/v2" +) + +var ( + ErrNotFound = errors.New("event or resource not found") + ErrUnsupportedEventKind = errors.New("unsupported event kind") + ErrUnsupportedMimeType = errors.New("unsupported link mime type") + ErrUnsupportedRelay = errors.New("unsupported relay") +) + +// LinkMeta contains metadata info about a URL. +// it is typically assembled from OGP (https://ogp.me) by Noxer.FetchLinkMeta. +type LinkMeta struct { + Type string // og:type + Title string // og:title + Description string // og:description + ImageURLs []string // og:image:secure_url or og:image:url +} + +// Noxer can proxy link preview info and data streams. +// See FetchLinkMeta and StreamLinkData for details. +// +// while the only required field is Cache, a zero value of KnownRelays +// makes Noxer refuse to proxy any URLs. +type Noxer struct { + // Cache is used to store both link preview meta info and + // data streamed to clients. it must be non-nil for Noxer to be usable. + Cache Cacher + + // Noxer refuses to work with web pages and data streams larger than this value. + MaxFileSize int64 // defaults to 1Mb + // how long to keep an open connection to a relay without any activity. + // an activity is any cache-miss call to FetchLinkMeta or StreamLinkData. + // connections to relays are used to verify whether a link is part of + // an event contents. see aforementioned methods for more details. + IdleRelayTimeout time.Duration // defaults to 1min + // Noxer connects only to those relays hostnames of which are specified here. + // in other words, slice elements are only hostname parts of relay URLs. + // KnownRelays must be sorted in ascending order. + KnownRelays []string + + // HTTPClient is used to make HTTP connections when fetching link preview + // info and data streaming. when nil, http.DefaultClient is used. + HTTPClient *http.Client + + // clients keeps track of nostr relay connections to clean them up + // and remove idle after IdleRelayTimeout. + clientsMu sync.Mutex + clients map[string]*relayClient + cleanupTimer *time.Timer + + // slurpers keep track of ongoing HTTP requests, both link preview + // meta info and data streams. + slurpersMu sync.Mutex + slurpers map[string]chan struct{} +} + +// relayClient wraps nostr.Relay with an additional timestamp +// indicating last use of the relay to keep track of all active relay +// connections and remove idle. +// +// lastUsed is updated every time Noxer.fetchNostrEvent is called. +type relayClient struct { + relay *nostr.Relay + lastUsed time.Time +} + +// FetchLinkMeta requests the web page at link URL, parses it as HTML and returns +// metadata found in the contents. It refuses to parse remote responses with +// content-type other than text/html. +// +// link URL must be found in content field of the nostr event posted to the +// specified relay. FetchLinkMeta connects to the nostr relay at relayURL +// and sends a filter'ed request with ids field set to eventID. +// the received event contents are "grepped" for the value of link as is. +// +// relayURL's hostname must be an element of x.KnownRelays. +// remote must respond with HTTP 200 OK to the link URL. +// +// successfully parsed link URLs are cached using Cacher.PutJSON. so, subsequent +// calls should not hit the remote server again unless x.Cache fails. +// concurrent requests are suspended until the context or first call is done. +func (x *Noxer) FetchLinkMeta(ctx context.Context, eventID, relayURL, link string) (*LinkMeta, error) { + if err := x.verifyEventLink(ctx, eventID, relayURL, link, verifyNoMeta); err != nil { + return nil, fmt.Errorf("verifyEventLink: %w", err) + } + return x.slurpLinkMeta(ctx, link) +} + +func (x *Noxer) slurpLinkMeta(ctx context.Context, link string) (*LinkMeta, error) { + // use cache here instead of directly in FetchLinkMeta to avoid + // hitting remotes in x.verifyEventLink as much as possible. + cacheKey := MakeCacheKey(link, CacheKeyURLPreview) + var meta LinkMeta + cacheErr := x.Cache.GetJSON(ctx, cacheKey, &meta) + if cacheErr == nil { + return &meta, nil + } + + log.Printf("cache.getjson %s(%s): %v", link, cacheKey, cacheErr) + ds, err := x.detachedSlurpData(ctx, link) + if err != nil { + return nil, fmt.Errorf("detachedSlurpData: %w", err) + } + defer ds.Close() + if mtype := ds.MimeType(); mtype != "text/html" { + return nil, fmt.Errorf("%w: received %q, want text/html", ErrUnsupportedMimeType, mtype) + } + res, err := parseLinkMeta(ds) + if err != nil { + return nil, fmt.Errorf("parseLinkMeta: %w", err) + } + if err := x.Cache.PutJSON(ctx, cacheKey, res); err != nil { + log.Printf("cache.putjson %s(%s): %v", link, cacheKey, err) + } + return res, nil +} + +// StreamLinkData opens an HTTP connection to link and streams the response back. +// while doing so, it also caches the reponse bytes using Cache.PutStream. so, +// subsequent calls should not hit the remote link again unless x.Cache fails. +// +// link URL must be found in "content" field of the nostr event posted to the +// specified relay. StreamLinkData connects to the nostr relay at relayURL +// and sends a filter'ed request with ids field set to eventID. +// for event kinds 1 (text note) and 42 (channel message), the event contents +// are simply "grepped" for the value of link as is. +// for event kinds 0 (set metadata), 40 (create channel) and 41 (set channel +// metadata) the link is checked against "picture" field. +// +// additionally, link URL may be one of LinkMeta.ImageURLs as returned by +// x.FetchLinkMeta to a call with the same eventID. +// +// relayURL's hostname must be an element of x.KnownRelays. +// remote must respond with HTTP 200 OK to the link URL. +// +// callers must close DataStream. +// concurrent requests are suspended until the context or first call is done. +func (x *Noxer) StreamLinkData(ctx context.Context, eventID, relayURL, link string) (*DataStream, error) { + if err := x.verifyEventLink(ctx, eventID, relayURL, link, verifyExpandMeta); err != nil { + return nil, err + } + cacheKey := MakeCacheKey(link, CacheKeyData) + ds, err := x.Cache.GetStream(ctx, cacheKey) + if err != nil { + log.Printf("cache.getstream %s(%s): %v", link, cacheKey, err) + ds, err = x.detachedSlurpData(ctx, link) + } + return ds, err +} + +// detachedSlurpData always finishes data streaming from remote url, event if +// the returned DataStream is closed prematurely, to cache the bytes for subsequent calls. +func (x *Noxer) detachedSlurpData(ctx context.Context, url string) (*DataStream, error) { + // check whether there's an ongoing stream. if so, wait and use cache or fail. + cacheKey := MakeCacheKey(url, CacheKeyData) + cacheKeyStr := cacheKey.Path() + x.slurpersMu.Lock() + slurpCh, found := x.slurpers[cacheKeyStr] + if found { + // a previous call is already streaming. + // wait 'till they're done, because the stream is non-seekable, + // then get it from cache or fail. + x.slurpersMu.Unlock() + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-slurpCh: + return x.Cache.GetStream(ctx, cacheKey) + } + } else { + // wouldn't need this branch if close(slurpCh) was done after x.slurpersMu.Lock() + // in the goroutine below. + // but it's so easy to miss in future code changes that i don't want to risk it: + // not a big deal to check the cache one more time. + // reconsider if performance here becomes a concern. + ds, err := x.Cache.GetStream(ctx, cacheKey) + if err == nil { + x.slurpersMu.Unlock() + return ds, nil + } + } + + // no other goroutine is streaming; do it now and make others wait on slurpCh. + slurpCh = x.makeSlurperChan(cacheKeyStr) + x.slurpersMu.Unlock() + + // assuming 1min is enough to download a file. + // this may be too short for large values of x.MaxFileSize. + // TODO: compute ctx based on x.MaxFileSize? + ctx, cancelHTTP := context.WithTimeout(context.Background(), time.Minute) + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + cancelHTTP() + return nil, err + } + resp, err := x.httpClient().Do(req) + if err != nil { + cancelHTTP() + return nil, err + } + if resp.StatusCode != http.StatusOK { + cancelHTTP() + if resp.StatusCode == http.StatusNotFound { + return nil, ErrNotFound + } + return nil, fmt.Errorf("bad HTTP response %s: %s", url, resp.Status) + } + ctype := resp.Header.Get("Content-Type") + if ctype == "" { + // TODO: sniff using mime magic bytes? + ctype = "application/octet-stream" + } + // rout is returned to the caller, wout is tee'ed from resp.Body. + // if the caller closes rout, tee'ing to wout also stops. + rout, wout := io.Pipe() + go func() { + defer func() { + resp.Body.Close() + wout.Close() + cancelHTTP() + close(slurpCh) + x.slurpersMu.Lock() + delete(x.slurpers, cacheKeyStr) + x.slurpersMu.Unlock() + }() + // the std io.TeeReader wouldn't work since it reports errors on reads + // from tee as soon as writes to wout fail which is the case if the caller + // closes rout. + tee := SinkTeeReader(HardLimitReader(resp.Body, x.maxFileSize()), wout) + if err := x.Cache.PutStream(ctx, cacheKey, ctype, tee); err != nil { + log.Printf("cache.putstream %s: %v", cacheKey, err) + // TODO: don't close; io.copy(wout, resp.body) here on cache failures? + } + }() + return &DataStream{ContentType: ctype, r: rout}, nil +} + +// expandMeta arg values for verifyEventLink +const ( + verifyExpandMeta = true + verifyNoMeta = false +) + +// verifyEventLink checks whether link URL is in a nostr event's content, +// or one of OGP link preview URLs if expandMeta is true. +func (x *Noxer) verifyEventLink(ctx context.Context, eventID, relayURL, link string, expandMeta bool) error { + if !x.whitelistedRelay(relayURL) { + return ErrUnsupportedRelay + } + eventURLs, err := x.fetchEventURLs(ctx, eventID, relayURL) + if err != nil { + return err + } + log.Printf("fetched event URLs: %q", eventURLs) + for _, u := range eventURLs { + if u == link { + return nil + } + } + if !expandMeta { + return ErrNotFound + } + + // link not found in the event text/json. + // check URLs in OGP metadata for each suitable link found in the event. + for _, urlStr := range eventURLs { + u, err := url.Parse(urlStr) + if err != nil { + continue // invalid url + } + if ext := path.Ext(u.Path); ext != "" { + if !strings.HasSuffix(ext, "html") && !strings.HasSuffix(ext, "htm") { + continue // assume not an html page + } + } + meta, err := x.slurpLinkMeta(ctx, urlStr) + if err != nil { + log.Printf("verifyEventLink slurpLinkMeta(%s): %v", u, err) + continue + } + for _, imgURL := range meta.ImageURLs { + if imgURL == link { + return nil + } + } + } + return ErrNotFound +} + +// fetchEventURLs returns all URLs found in a nostr event. +// it assumes the relay URL is already checked to match x.KnownRelays. +func (x *Noxer) fetchEventURLs(ctx context.Context, eventID, relayURL string) ([]string, error) { + // check whether there's an ongoing fetch. if so, wait and use cache or fail. + cacheKey := MakeCacheKey(eventID, CacheKeyEvent) + cacheKeyStr := cacheKey.Path() + x.slurpersMu.Lock() + slurpCh, found := x.slurpers[cacheKeyStr] + if found { + // a previous call is already fetching. + // wait 'till they're done, then get it from cache or fail. + x.slurpersMu.Unlock() + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-slurpCh: + var urls []string + err := x.Cache.GetJSON(ctx, cacheKey, &urls) + return urls, err + } + } else { + // same reasoning as in detachedSlurpData. + // wouldn't need this branch if close(slurpCh) was done after x.slurpersMu.Lock() + // in the goroutine below. but it's too easy to miss in future code changes. + // checking cache one more time here is most likely insignificant when compared to + // opening a websocket to a nostr relay. + var urls []string + if err := x.Cache.GetJSON(ctx, cacheKey, &urls); err == nil { + x.slurpersMu.Unlock() + return urls, nil + } + } + + // no other goroutine is fetching; do it now and make others wait on slurpCh. + slurpCh = x.makeSlurperChan(cacheKeyStr) + x.slurpersMu.Unlock() + defer func() { + close(slurpCh) + x.slurpersMu.Lock() + delete(x.slurpers, cacheKeyStr) + x.slurpersMu.Unlock() + }() + + event, err := x.fetchNostrEvent(ctx, eventID, relayURL) + if err != nil { + return nil, err + } + var eventURLs []string + switch event.Kind { + default: + return nil, ErrUnsupportedEventKind + case nostr.KindTextNote, nostr.KindChannelMessage: + eventURLs = extractAcceptableURLs(event.Content) + case nostr.KindSetMetadata, nostr.KindChannelCreation, nostr.KindChannelMetadata: + var p struct{ Picture string } + if err := json.Unmarshal([]byte(event.Content), &p); err != nil { + return nil, err + } + if validURL(p.Picture) { + eventURLs = append(eventURLs, p.Picture) + } + } + + if err := x.Cache.PutJSON(ctx, cacheKey, eventURLs); err != nil { + log.Printf("cache.putjson %s: %v", cacheKey, err) + } + return eventURLs, nil +} + +// assuming relay is whitelisted +func (x *Noxer) fetchNostrEvent(ctx context.Context, eventID, relayURL string) (*nostr.Event, error) { + relay, err := x.relayConn(ctx, relayURL) + if err != nil { + return nil, err + } + + var ( + event *nostr.Event + fetchErr error + ) + // assuming 10sec is more than enough for a simple filter'ed sub with a single + // event ID. + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + done := make(chan struct{}) + go func() { + defer close(done) + f := nostr.Filter{IDs: []string{eventID}, Limit: 1} + sub := relay.Subscribe(nostr.Filters{f}) + defer sub.Unsub() + select { + case e := <-sub.Events: + // e.CheckSignature() is already done by the client + event = &e + case <-ctx.Done(): + fetchErr = ctx.Err() + } + }() + + select { + case <-done: + return event, fetchErr + case <-ctx.Done(): + return nil, ctx.Err() + } +} + +// connect to a nostr relay at relayURL or reuse an existing conn. +// it blocks all other callers. +func (x *Noxer) relayConn(ctx context.Context, relayURL string) (*nostr.Relay, error) { + // check existing conn and reuse if found + relayURL = nostr.NormalizeURL(relayURL) + x.clientsMu.Lock() + defer x.clientsMu.Unlock() + if cl, ok := x.clients[relayURL]; ok { + // "touch" the last used to let cleanup timer know we aren't idling + cl.lastUsed = time.Now() + return cl.relay, nil + } + + // none found. make a new conn. + var ( + relay *nostr.Relay + connErr error + ) + // assuming 10sec is more than enough to connect to a websocket. + connCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + done := make(chan struct{}) + go func() { + // TODO: send a patch upstream for a nostr.RelayConnectContext(ctx, url) + relay, connErr = nostr.RelayConnect(relayURL) + close(done) + }() + select { + case <-connCtx.Done(): + // unfortunately, this leaves the above goroutine hanging, and will keep + // piling up for non-responsive relays. + // can be solved with a nostr.RelayConnectContext. + return nil, connCtx.Err() + case <-done: + if connErr != nil { + return nil, connErr + } + } + if x.clients == nil { + x.clients = make(map[string]*relayClient) + } + x.clients[relayURL] = &relayClient{ + relay: relay, + lastUsed: time.Now(), + } + // a self-cleanup goroutine to delete ourselves if relay reports conn errors. + go func() { + err := <-relay.ConnectionError + log.Printf("%s: closing due to: %v", relayURL, err) + x.clientsMu.Lock() + defer x.clientsMu.Unlock() + relay.Close() + delete(x.clients, relayURL) + }() + if x.cleanupTimer == nil { + x.cleanupTimer = time.AfterFunc(x.idleRelayTimeout(), x.cleanupRelayConn) + } + return relay, nil +} + +// close and delete nostr relay connections idling for more than x.idleRelayTimeout(). +func (x *Noxer) cleanupRelayConn() { + x.clientsMu.Lock() + defer x.clientsMu.Unlock() + for url, cl := range x.clients { + if time.Since(cl.lastUsed) > x.idleRelayTimeout() { + log.Printf("closing idle conn to %s", url) + cl.relay.Close() + delete(x.clients, url) + } + } + if len(x.clients) > 0 { + x.cleanupTimer = time.AfterFunc(time.Minute, x.cleanupRelayConn) + } else { + x.cleanupTimer = nil + } +} + +// assumes x.slurpersMu is handled by the caller. +func (x *Noxer) makeSlurperChan(k string) chan struct{} { + if x.slurpers == nil { + x.slurpers = make(map[string]chan struct{}) + } + ch := make(chan struct{}) + x.slurpers[k] = ch + return ch +} + +func (x *Noxer) httpClient() *http.Client { + if x.HTTPClient == nil { + return http.DefaultClient + } + return x.HTTPClient +} + +func (x *Noxer) idleRelayTimeout() time.Duration { + if x.IdleRelayTimeout == 0 { + return time.Minute + } + return x.IdleRelayTimeout +} + +func (x *Noxer) maxFileSize() int64 { + if x.MaxFileSize == 0 { + return 1 << 20 // 1Mb + } + return x.MaxFileSize +} + +// whitelistedRelay reports whether a nostr relay at urlStr is in x.KnownRelays. +// it expects x.KnownRelays to be sorted in lexical order. +// +// only hostname of urlStr is checked against x.KnownRelays. +func (x *Noxer) whitelistedRelay(urlStr string) bool { + u, err := url.Parse(urlStr) + if err != nil { + return false + } + host := u.Hostname() + i := sort.SearchStrings(x.KnownRelays, host) + return i < len(x.KnownRelays) && x.KnownRelays[i] == host +} + +// TODO: use oEmbed if OGP fails? +func parseLinkMeta(r io.Reader) (*LinkMeta, error) { + og := opengraph.NewOpenGraph() + if err := og.ProcessHTML(r); err != nil { + return nil, err + } + if len(og.Images) == 0 { + return nil, ErrNotFound + } + meta := &LinkMeta{ + Type: og.Type, + Title: og.Title, + Description: og.Description, + ImageURLs: make([]string, 0, len(og.Images)), + } + for _, img := range og.Images { + u := img.SecureURL + if u == "" { + u = img.URL + } + if u == "" { + continue + } + meta.ImageURLs = append(meta.ImageURLs, u) + } + return meta, nil +} + +// TODO: patch to extract only host/ip; no emails and such +var urlRegexp = xurls.Relaxed() + +func extractAcceptableURLs(text string) []string { + var urls []string + for _, a := range urlRegexp.FindAllString(text, -1) { + if validURL(a) { + urls = append(urls, a) + } + } + return urls +} + +func validURL(urlStr string) bool { + if urlStr == "" { + return false + } + u, err := url.Parse(urlStr) + if err != nil { + return false + } + if u.Hostname() == "" { + return false + } + return u.Scheme == "" || u.Scheme == "http" || u.Scheme == "https" +} diff --git a/noxy_test.go b/noxy_test.go new file mode 100644 index 0000000..b15bd53 --- /dev/null +++ b/noxy_test.go @@ -0,0 +1,414 @@ +package noxy + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "math" + "net/http" + "net/http/httptest" + "net/url" + "os" + "sync" + "testing" + "testing/iotest" + "time" + + nostr "github.com/nbd-wtf/go-nostr" + "golang.org/x/net/websocket" +) + +func TestDetachedSlurpDataCacheMiss(t *testing.T) { + const contents = "text file" + const ctype = "text/plain;charset=utf-8" + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", ctype) + w.Write([]byte(contents)) + })) + defer ts.Close() + var testURL = ts.URL + "/" + + cache := DirCache{Root: t.TempDir()} + noxer := Noxer{Cache: cache, MaxFileSize: 1024} + + for i := 1; i <= 2; i++ { + t.Run(fmt.Sprintf("slurp %d", i), func(t *testing.T) { + bgCtx := context.Background() + canceledCtx, cancel := context.WithCancel(bgCtx) + cancel() // slurp must run on a separate context + ds, err := noxer.detachedSlurpData(canceledCtx, testURL) + if err != nil { + t.Fatalf("noxer.detachedSlurpData: %v", err) + } + checkDataStream(t, ds, ctype, []byte(contents)) + + checkCachedDataFile(t, cache, testURL, []byte(contents)) + cacheKey := MakeCacheKey(testURL, CacheKeyData) + cachedDS, err := cache.GetStream(bgCtx, cacheKey) + if err != nil { + t.Fatalf("cache.GetStream: %v", err) + } + checkDataStream(t, cachedDS, ctype, []byte(contents)) + + noxer.slurpersMu.Lock() + defer noxer.slurpersMu.Unlock() + if len(noxer.slurpers) > 0 { + t.Error("x.slurpers is not empty") + } + }) + } +} + +func TestDetachedSlurpDataClosedReader(t *testing.T) { + const ctype = "text/plain;charset=utf-8" + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", ctype) + w.Write([]byte("foo")) + time.Sleep(time.Second) + w.Write([]byte("bar")) + })) + defer ts.Close() + var testURL = ts.URL + "/" + + cache := DirCache{Root: t.TempDir()} + noxer := Noxer{Cache: cache, MaxFileSize: 1024} + + ctx := context.Background() + ds1, err := noxer.detachedSlurpData(ctx, testURL) + if err != nil { + t.Fatalf("noxer.detachedSlurpData 1: %v", err) + } + ds1.r.(io.Closer).Close() + + cacheKey := MakeCacheKey(testURL, CacheKeyData) + noxer.slurpersMu.Lock() + ch := noxer.slurpers[cacheKey.Path()] + noxer.slurpersMu.Unlock() + select { + case <-time.After(3 * time.Second): + t.Fatal("slurp took too long") + case <-ch: + } + + ds2, err := cache.GetStream(ctx, cacheKey) + if err != nil { + t.Fatalf("cache.GetStream: %v", err) + } + checkDataStream(t, ds2, ctype, []byte("foobar")) +} + +func TestSlurpLinkMeta(t *testing.T) { + var count int + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if count > 0 { + w.WriteHeader(http.StatusNotFound) + return + } + count += 1 + w.Header().Set("Content-Type", "text/html") + fmt.Fprintln(w, ` + + + + + `) + })) + defer ts.Close() + var testURL = ts.URL + "/" + + cache := DirCache{Root: t.TempDir()} + noxer := Noxer{Cache: cache, MaxFileSize: 1024} + meta1, err := noxer.slurpLinkMeta(context.Background(), testURL) + if err != nil { + t.Fatalf("slurpLinkMeta 1: %v", err) + } + wantMeta := &LinkMeta{ + Type: "article", + Title: "test title", + Description: "test descr", + ImageURLs: []string{"http://unused:0/image.png"}, + } + compareLinkMeta(t, meta1, wantMeta) + + // expected to be cached by now + meta2, err := noxer.slurpLinkMeta(context.Background(), testURL) + if err != nil { + t.Fatalf("slurpLinkMeta 2: %v", err) + } + compareLinkMeta(t, meta2, wantMeta) +} + +func TestSlurpLinkMetaHTTPErr(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + defer ts.Close() + var testURL = ts.URL + "/" + + noxer := Noxer{Cache: NullCache, MaxFileSize: 1024} + _, err := noxer.slurpLinkMeta(context.Background(), testURL) + if !errors.Is(err, ErrNotFound) { + t.Errorf("slurpLinkMeta err=%v; want ErrNotFound", err) + } +} + +func TestVerifyEventLinkNoMeta(t *testing.T) { + priv := genNostrKey() + event := &nostr.Event{ + CreatedAt: time.Now(), + Kind: nostr.KindTextNote, + Content: "text; http://unused:0/foo and http://unused:0/bar", + PubKey: nostrPubKey(priv), + } + if err := event.Sign(priv); err != nil { + t.Fatal(err) + } + + trelay := ServeSingleEvent(t, event) + defer trelay.Close() + t.Logf("fake relay URL: %s", trelay.URL) + + noxer := Noxer{ + Cache: DirCache{Root: t.TempDir()}, + MaxFileSize: 1024, + KnownRelays: []string{"127.0.0.1"}, + IdleRelayTimeout: time.Minute, + } + tt := []struct { + url string + wantOK bool + }{ + {"http://unused:0/foo", true}, + {"http://unused:0/bar", true}, + {"http://unused:0/", false}, + {"http://example.org", false}, + } + for _, tc := range tt { + t.Run(tc.url, func(t *testing.T) { + ctx := context.Background() + err := noxer.verifyEventLink(ctx, event.ID, trelay.URL, tc.url, verifyNoMeta) + switch { + case tc.wantOK && err != nil: + t.Errorf("verifyEventLink: %v", err) + case !tc.wantOK && err == nil: + t.Error("verifyEventLink returned nil error") + } + }) + } + + if subs := trelay.OpenSubs(); len(subs) > 0 { + t.Errorf("trelay.OpenSubs is not empty: %q", subs) + } +} + +func TestFetchMetaAndStreamData(t *testing.T) { + var website *httptest.Server + website = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + default: + w.WriteHeader(http.StatusBadRequest) + t.Errorf("%s %s", r.Method, r.URL) + case "/": + w.Header().Set("Content-Type", "text/html") + fmt.Fprintf(w, ` + + `, website.URL) + case "/image.png": + w.Header().Set("Content-Type", "image/png") + w.Write([]byte{1, 2, 3}) + } + })) + defer website.Close() + websiteRootURL := website.URL + "/" + websiteImageURL := website.URL + "/image.png" + + priv := genNostrKey() + event := &nostr.Event{ + CreatedAt: time.Now(), + Kind: nostr.KindTextNote, + Content: fmt.Sprintf("link to an html page with image: %s", websiteRootURL), + PubKey: nostrPubKey(priv), + } + if err := event.Sign(priv); err != nil { + t.Fatal(err) + } + trelay := ServeSingleEvent(t, event) + defer trelay.Close() + + cache := DirCache{Root: t.TempDir()} + noxer := Noxer{ + Cache: cache, + MaxFileSize: 1024, + KnownRelays: []string{"127.0.0.1"}, + IdleRelayTimeout: time.Minute, + } + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + meta, err := noxer.FetchLinkMeta(ctx, event.ID, trelay.URL, websiteRootURL) + if err != nil { + t.Fatalf("FetchLinkMeta(%s): %v", websiteRootURL, err) + } + var cachedMeta LinkMeta + if err := cache.GetJSON(ctx, MakeCacheKey(websiteRootURL, CacheKeyURLPreview), &cachedMeta); err != nil { + t.Fatalf("cache.getjson: %v", err) + } + compareLinkMeta(t, meta, &cachedMeta) + + ds, err := noxer.StreamLinkData(ctx, event.ID, trelay.URL, websiteImageURL) + if err != nil { + t.Fatalf("StreamLinkData(%s): %v", websiteImageURL, err) + } + checkDataStream(t, ds, "image/png", []byte{1, 2, 3}) + checkCachedDataFile(t, cache, websiteImageURL, []byte{1, 2, 3}) +} + +func checkDataStream(t *testing.T, ds *DataStream, ctype string, contents []byte) { + t.Helper() + if err := iotest.TestReader(ds, contents); err != nil { + t.Errorf("data stream reader: %v", err) + } + if ds.ContentType != ctype { + t.Errorf("ds.ContentType = %q; want %q", ds.ContentType, ctype) + } +} + +func checkCachedDataFile(t *testing.T, cache DirCache, origURL string, contents []byte) { + t.Helper() + cacheKey := MakeCacheKey(origURL, CacheKeyData) + b, err := os.ReadFile(cache.makeFilepath(cacheKey, false)) + if err != nil { + t.Errorf("cache file read: %v", err) + } + if !bytes.Equal(b, contents) { + t.Errorf("cached bytes = %q; want %q", b, contents) + } +} + +func compareLinkMeta(t *testing.T, actual, expected *LinkMeta) { + t.Helper() + if actual.Type != expected.Type { + t.Errorf("actual.Type = %q; want %q", actual.Type, expected.Type) + } + if actual.Title != expected.Title { + t.Errorf("actual.Title = %q; want %q", actual.Title, expected.Title) + } + if actual.Description != expected.Description { + t.Errorf("actual.Description = %q; want %q", actual.Description, expected.Description) + } + if len(actual.ImageURLs) != 1 || actual.ImageURLs[0] != expected.ImageURLs[0] { + t.Errorf("actual.ImageURLs = %q; want %q", actual.ImageURLs, expected.ImageURLs) + } +} + +func genNostrKey() string { + k := nostr.GeneratePrivateKey() + if k == "" { + panic("nostr.GeneratePrivateKey returned empty string") + } + return k +} + +func nostrPubKey(priv string) string { + pub, err := nostr.GetPublicKey(priv) + if err != nil { + panic(err.Error()) + } + return pub +} + +type FakeNostrRelay struct { + Event *nostr.Event + + URL string + HTTPServer *httptest.Server + + Mu sync.Mutex + Subs map[string]bool // id => true if still active; false for unsub'ed IDs +} + +func (nr *FakeNostrRelay) Close() { + nr.HTTPServer.Close() +} + +func (nr *FakeNostrRelay) OpenSubs() []string { + nr.Mu.Lock() + defer nr.Mu.Unlock() + var a []string + for k, open := range nr.Subs { + if open { + a = append(a, k) + } + } + return a +} + +func nostrHandler(t *testing.T, nr *FakeNostrRelay) func(*websocket.Conn) { + return func(conn *websocket.Conn) { + for { + var req [3]any + if err := websocket.JSON.Receive(conn, &req); err != nil { + conn.Close() + return + } + switch req[0].(string) { + default: + t.Errorf("ws handler req[0]=%q; want REQ or CLOSE", req[0]) + conn.Close() + return + case "CLOSE": + nr.Mu.Lock() + defer nr.Mu.Unlock() + nr.Subs[req[1].(string)] = false + return + case "REQ": + subid := req[1].(string) + nr.Mu.Lock() + nr.Subs[subid] = true + nr.Mu.Unlock() + + filters := req[2].(map[string]any) + t.Logf("ws handler sub=%q, filters=%s", subid, filters) + if ids := filters["ids"].([]any); len(ids) != 1 || ids[0].(string) != nr.Event.ID { + t.Errorf("ws handler REQ filter ids=%q; want [%q]", ids, []string{nr.Event.ID}) + } + if limit := filters["limit"].(float64); math.Abs(limit-1) > 0.00001 { + t.Errorf("ws handler REQ limit=%f; want 1", limit) + } + b, err := json.Marshal(nr.Event) + if err != nil { + t.Errorf("json.Marshal: %v", err) + conn.Close() + return + } + resp := fmt.Sprintf(`["EVENT", %q, %s]`, subid, b) + t.Logf("ws handler resp: %s", resp) + if err := websocket.Message.Send(conn, resp); err != nil { + t.Errorf("ws handler REQ write: %v", err) + } + } + } + } +} + +func ServeSingleEvent(t *testing.T, event *nostr.Event) *FakeNostrRelay { + relay := &FakeNostrRelay{ + Event: event, + Subs: make(map[string]bool), + } + relay.HTTPServer = httptest.NewServer(&websocket.Server{ + Handshake: func(conf *websocket.Config, r *http.Request) error { + t.Logf("new handshake from %s", r.RemoteAddr) + return nil + }, + Handler: nostrHandler(t, relay), + }) + tsurl, err := url.Parse(relay.HTTPServer.URL) + if err != nil { + panic(err) + } + relay.URL = fmt.Sprintf("ws://%s/", tsurl.Host) + return relay +} diff --git a/tools/release.sh b/tools/release.sh new file mode 100755 index 0000000..894e4c5 --- /dev/null +++ b/tools/release.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -e + +VERSION=${VERSION:-$(git describe --tags)} +export CGO_ENABLED=0 +exec go build -ldflags "-s -w -X main.noxyVersion=$VERSION" -buildmode=pie -trimpath ./cmd/noxy/