From 9fbfb0b3931117cfcf055e6f694f3500f6d1dd62 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 8 Mar 2022 02:16:10 +0100 Subject: [PATCH] feat(gw): ?format= or Accept HTTP header - extracted file-like content type responses to separate .go files - Accept HTTP header with support for application/vnd.ipld.* types (TBD, we did not register them yet, so for illustration purpose only) --- core/corehttp/gateway_handler.go | 206 ++++++------------------- core/corehttp/gateway_handler_block.go | 37 +++++ core/corehttp/gateway_handler_car.go | 59 +++++++ core/corehttp/gateway_handler_file.go | 81 ++++++++++ 4 files changed, 220 insertions(+), 163 deletions(-) create mode 100644 core/corehttp/gateway_handler_block.go create mode 100644 core/corehttp/gateway_handler_car.go create mode 100644 core/corehttp/gateway_handler_file.go diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index f46b6379e6c..0973e42cf9e 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -1,13 +1,10 @@ package corehttp import ( - "bytes" "context" "fmt" "html/template" "io" - "io/ioutil" - "mime" "net/http" "net/url" "os" @@ -19,8 +16,6 @@ import ( "time" humanize "github.com/dustin/go-humanize" - "github.com/gabriel-vasile/mimetype" - blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" files "github.com/ipfs/go-ipfs-files" assets "github.com/ipfs/go-ipfs/assets" @@ -30,8 +25,6 @@ import ( "github.com/ipfs/go-path/resolver" coreiface "github.com/ipfs/interface-go-ipfs-core" ipath "github.com/ipfs/interface-go-ipfs-core/path" - gocar "github.com/ipld/go-car" - selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" routing "github.com/libp2p/go-libp2p-core/routing" prometheus "github.com/prometheus/client_golang/prometheus" ) @@ -312,24 +305,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } - // Support custom response format via explicit override in URL - if responseFormat := r.URL.Query().Get("format"); responseFormat != "" { - switch responseFormat { - case "block": - logger.Debugw("serving raw block", "path", parsedPath) - i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath) - return - case "car": - logger.Debugw("serving car", "path", parsedPath) - i.serveCar(w, r, resolvedPath.Cid(), parsedPath) - return - default: - err := fmt.Errorf("unsupported format %q", responseFormat) - webError(w, "failed to parse request format", err, http.StatusBadRequest) - return - } - } - // HTTP Headers i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("X-Ipfs-Path", urlPath) @@ -341,6 +316,32 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } + // Support custom response formats passed via ?format or Accept HTTP header + if contentType := getExplicitContentType(r); contentType != "" { + switch contentType { + case "application/vnd.ipld.raw": + logger.Debugw("serving raw block", "path", parsedPath) + i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath) + return + case "application/vnd.ipld.car": + logger.Debugw("serving car stream", "path", parsedPath) + i.serveCar(w, r, resolvedPath.Cid(), parsedPath) + return + case "application/vnd.ipld.car; version=1": + logger.Debugw("serving car stream", "path", parsedPath) + i.serveCar(w, r, resolvedPath.Cid(), parsedPath) + return + case "application/vnd.ipld.car; version=2": // no CARv2 in go-ipfs atm + err := fmt.Errorf("unsupported CARv2 format, try again with CARv1") + webError(w, "failed respond with requested content type", err, http.StatusBadRequest) + return + default: + err := fmt.Errorf("unsupported format %q", contentType) + webError(w, "failed respond with requested content type", err, http.StatusBadRequest) + return + } + } + // Handling Unixfs dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) if err != nil { @@ -528,144 +529,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } } -// serveFile returns data behind a file along with HTTP headers based on -// the file itself, its CID and the contentPath used for accessing it. -func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid, file files.File) { - - // Set Cache-Control and read optional Last-Modified time - modtime := addCacheControlHeaders(w, r, contentPath, fileCid) - - // Set Content-Disposition - name := addContentDispositionHeader(w, r, contentPath) - - // Prepare size value for Content-Length HTTP header (set inside of http.ServeContent) - size, err := file.Size() - if err != nil { - http.Error(w, "cannot serve files with unknown sizes", http.StatusBadGateway) - return - } - - // Lazy seeker enables efficient range-requests and HTTP HEAD responses - content := &lazySeeker{ - size: size, - reader: file, - } - - // Calculate deterministic value for Content-Type HTTP header - // (we prefer to do it here, rather than using implicit sniffing in http.ServeContent) - var ctype string - if _, isSymlink := file.(*files.Symlink); isSymlink { - // We should be smarter about resolving symlinks but this is the - // "most correct" we can be without doing that. - ctype = "inode/symlink" - } else { - ctype = mime.TypeByExtension(gopath.Ext(name)) - if ctype == "" { - // uses https://github.com/gabriel-vasile/mimetype library to determine the content type. - // Fixes https://github.com/ipfs/go-ipfs/issues/7252 - mimeType, err := mimetype.DetectReader(content) - if err != nil { - http.Error(w, fmt.Sprintf("cannot detect content-type: %s", err.Error()), http.StatusInternalServerError) - return - } - - ctype = mimeType.String() - _, err = content.Seek(0, io.SeekStart) - if err != nil { - http.Error(w, "seeker can't seek", http.StatusInternalServerError) - return - } - } - // Strip the encoding from the HTML Content-Type header and let the - // browser figure it out. - // - // Fixes https://github.com/ipfs/go-ipfs/issues/2203 - if strings.HasPrefix(ctype, "text/html;") { - ctype = "text/html" - } - } - // Setting explicit Content-Type to avoid mime-type sniffing on the client - // (unifies behavior across gateways and web browsers) - w.Header().Set("Content-Type", ctype) - - // special fixup around redirects - w = &statusResponseWriter{w} - - http.ServeContent(w, r, name, modtime, content) -} - -func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path) { - blockReader, err := i.api.Block().Get(r.Context(), contentPath) - if err != nil { - webError(w, "failed to get block", err, http.StatusInternalServerError) - return - } - block, err := ioutil.ReadAll(blockReader) - if err != nil { - webError(w, "failed to read block", err, http.StatusInternalServerError) - return - } - content := bytes.NewReader(block) - - // Set Content-Disposition - name := blockCid.String() + ".ipfs.block" - setContentDispositionHeader(w, name, "attachment") - - // Set remaining headers - modtime := addCacheControlHeaders(w, r, contentPath, blockCid) - w.Header().Set("Content-Type", "application/octet-stream") - w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) - - // Done: http.ServeContent will take care of Content-Length and range requests - http.ServeContent(w, r, name, modtime, content) -} - -func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path) { - ctx := r.Context() - - // Set Content-Disposition - name := rootCid.String() + ".ipfs.car" - setContentDispositionHeader(w, name, "attachment") - - // Set remaining headers - /* TODO modtime := addCacheControlHeaders(w, r, contentPath, rootCid) - - how does cache-control look like, given car can fail mid-stream? - - we don't want clients to cache partial/interrupted CAR - - we may document that client should verify that all blocks were dowloaded, - or we may leverage content-length to hint something went wrong - */ - - /* TODO: content-length (so user agents show % of remaining download) - - introduce max-car-size limit in go-ipfs-config and pre-compute CAR first, and then get size and use lazySeeker? - - are we able to provide length for Unixfs DAGs? (CumulativeSize+CARv0 header+envelopes) - */ - - w.Header().Set("Content-Type", "application/octet-stream") - w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) - - // Same go-car settings as dag.export command - store := dagStore{dag: i.api.Dag(), ctx: ctx} - dag := gocar.Dag{Root: rootCid, Selector: selectorparse.CommonSelector_ExploreAllRecursively} - car := gocar.NewSelectiveCar(ctx, store, []gocar.Dag{dag}, gocar.TraverseLinksOnlyOnce()) - - w.Header().Set("Transfer-Encoding", "chunked") - w.WriteHeader(http.StatusOK) - - if err := car.Write(w); err != nil { - // TODO: can we do any error handling here? - } -} - -type dagStore struct { - dag coreiface.APIDagService - ctx context.Context -} - -func (ds dagStore) Get(c cid.Cid) (blocks.Block, error) { - obj, err := ds.dag.Get(ds.ctx, c) - return obj, err -} - func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, parsedPath ipath.Path) bool { resolved404Path, ctype, err := i.searchUpTreeFor404(r, parsedPath) if err != nil { @@ -1023,6 +886,23 @@ func getFilename(contentPath ipath.Path) string { return gopath.Base(s) } +// return explicit response format if specified in request as query parameter or via Accept HTTP header +func getExplicitContentType(r *http.Request) string { + if formatParam := r.URL.Query().Get("format"); formatParam != "" { + // translate query param to a content type + switch formatParam { + case "raw": + return "application/vnd.ipld.raw" + case "car": + return "application/vnd.ipld.car" + } + } + if accept := r.Header.Get("Accept"); strings.HasPrefix(accept, "application/vnd.") { + return accept + } + return "" +} + func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, parsedPath ipath.Path) (ipath.Resolved, string, error) { filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) if err != nil { diff --git a/core/corehttp/gateway_handler_block.go b/core/corehttp/gateway_handler_block.go new file mode 100644 index 00000000000..9264e6875a4 --- /dev/null +++ b/core/corehttp/gateway_handler_block.go @@ -0,0 +1,37 @@ +package corehttp + +import ( + "bytes" + "io/ioutil" + "net/http" + + cid "github.com/ipfs/go-cid" + ipath "github.com/ipfs/interface-go-ipfs-core/path" +) + +// serveRawBlock returns bytes behind a raw block +func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path) { + blockReader, err := i.api.Block().Get(r.Context(), contentPath) + if err != nil { + webError(w, "failed to get block", err, http.StatusInternalServerError) + return + } + block, err := ioutil.ReadAll(blockReader) + if err != nil { + webError(w, "failed to read block", err, http.StatusInternalServerError) + return + } + content := bytes.NewReader(block) + + // Set Content-Disposition + name := blockCid.String() + ".raw" + setContentDispositionHeader(w, name, "attachment") + + // Set remaining headers + modtime := addCacheControlHeaders(w, r, contentPath, blockCid) + w.Header().Set("Content-Type", "application/vnd.ipld.raw") + w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + + // Done: http.ServeContent will take care of Content-Length and range requests + http.ServeContent(w, r, name, modtime, content) +} diff --git a/core/corehttp/gateway_handler_car.go b/core/corehttp/gateway_handler_car.go new file mode 100644 index 00000000000..5702e86b01a --- /dev/null +++ b/core/corehttp/gateway_handler_car.go @@ -0,0 +1,59 @@ +package corehttp + +import ( + "context" + "net/http" + + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" + coreiface "github.com/ipfs/interface-go-ipfs-core" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + gocar "github.com/ipld/go-car" + selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" +) + +// serveCar returns a CAR stream for specific DAG+selector +func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path) { + ctx := r.Context() + + // Set Content-Disposition + name := rootCid.String() + ".car" + setContentDispositionHeader(w, name, "attachment") + + // Set remaining headers + /* TODO modtime := addCacheControlHeaders(w, r, contentPath, rootCid) + - how does cache-control look like, given car can fail mid-stream? + - we don't want clients to cache partial/interrupted CAR + - we may document that client should verify that all blocks were dowloaded, + or we may leverage content-length to hint something went wrong + */ + + /* TODO: content-length (so user agents show % of remaining download) + - introduce max-car-size limit in go-ipfs-config and pre-compute CAR first, and then get size and use lazySeeker? + - are we able to provide length for Unixfs DAGs? (CumulativeSize+CARv0 header+envelopes) + */ + + w.Header().Set("Content-Type", "application/vnd.ipld.car; version=1") + w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + + // Same go-car settings as dag.export command + store := dagStore{dag: i.api.Dag(), ctx: ctx} + dag := gocar.Dag{Root: rootCid, Selector: selectorparse.CommonSelector_ExploreAllRecursively} + car := gocar.NewSelectiveCar(ctx, store, []gocar.Dag{dag}, gocar.TraverseLinksOnlyOnce()) + + w.WriteHeader(http.StatusOK) + + if err := car.Write(w); err != nil { + // TODO: can we do any error handling here? + } +} + +type dagStore struct { + dag coreiface.APIDagService + ctx context.Context +} + +func (ds dagStore) Get(c cid.Cid) (blocks.Block, error) { + obj, err := ds.dag.Get(ds.ctx, c) + return obj, err +} diff --git a/core/corehttp/gateway_handler_file.go b/core/corehttp/gateway_handler_file.go new file mode 100644 index 00000000000..4ca85565f0c --- /dev/null +++ b/core/corehttp/gateway_handler_file.go @@ -0,0 +1,81 @@ +package corehttp + +import ( + "fmt" + "io" + "mime" + "net/http" + gopath "path" + "strings" + + "github.com/gabriel-vasile/mimetype" + cid "github.com/ipfs/go-cid" + files "github.com/ipfs/go-ipfs-files" + ipath "github.com/ipfs/interface-go-ipfs-core/path" +) + +// serveFile returns data behind a file along with HTTP headers based on +// the file itself, its CID and the contentPath used for accessing it. +func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid, file files.File) { + + // Set Cache-Control and read optional Last-Modified time + modtime := addCacheControlHeaders(w, r, contentPath, fileCid) + + // Set Content-Disposition + name := addContentDispositionHeader(w, r, contentPath) + + // Prepare size value for Content-Length HTTP header (set inside of http.ServeContent) + size, err := file.Size() + if err != nil { + http.Error(w, "cannot serve files with unknown sizes", http.StatusBadGateway) + return + } + + // Lazy seeker enables efficient range-requests and HTTP HEAD responses + content := &lazySeeker{ + size: size, + reader: file, + } + + // Calculate deterministic value for Content-Type HTTP header + // (we prefer to do it here, rather than using implicit sniffing in http.ServeContent) + var ctype string + if _, isSymlink := file.(*files.Symlink); isSymlink { + // We should be smarter about resolving symlinks but this is the + // "most correct" we can be without doing that. + ctype = "inode/symlink" + } else { + ctype = mime.TypeByExtension(gopath.Ext(name)) + if ctype == "" { + // uses https://github.com/gabriel-vasile/mimetype library to determine the content type. + // Fixes https://github.com/ipfs/go-ipfs/issues/7252 + mimeType, err := mimetype.DetectReader(content) + if err != nil { + http.Error(w, fmt.Sprintf("cannot detect content-type: %s", err.Error()), http.StatusInternalServerError) + return + } + + ctype = mimeType.String() + _, err = content.Seek(0, io.SeekStart) + if err != nil { + http.Error(w, "seeker can't seek", http.StatusInternalServerError) + return + } + } + // Strip the encoding from the HTML Content-Type header and let the + // browser figure it out. + // + // Fixes https://github.com/ipfs/go-ipfs/issues/2203 + if strings.HasPrefix(ctype, "text/html;") { + ctype = "text/html" + } + } + // Setting explicit Content-Type to avoid mime-type sniffing on the client + // (unifies behavior across gateways and web browsers) + w.Header().Set("Content-Type", ctype) + + // special fixup around redirects + w = &statusResponseWriter{w} + + http.ServeContent(w, r, name, modtime, content) +}