Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gateway): JSON and CBOR response formats (IPIP-328) #9335

Merged
merged 39 commits into from
Dec 5, 2022
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
ac8f9b1
wip: play with dag-cbor and dag-json
hacdias Oct 6, 2022
3dce012
wip: add application/json and application/cbor
hacdias Oct 7, 2022
b557181
fix: go cases don't flow automatically :)
hacdias Oct 10, 2022
4104bb7
test: add some dag-json and dag-cbor tests
hacdias Oct 10, 2022
25893c5
test: improve names
hacdias Oct 10, 2022
e621e64
feat: allow json and cbor data types too
hacdias Oct 11, 2022
44946ed
refactor: avoid encoding things that are already on their right encoding
hacdias Oct 13, 2022
199ab42
fix: remove responseFormat from logging
hacdias Oct 13, 2022
89eb033
refactor: simplify serveCodec to use serveRawBlock iff data encoded i…
hacdias Oct 13, 2022
fb50869
tests: rename current tests to indicate they're unixfs only
hacdias Oct 13, 2022
fc31241
refactor: do not use serveRawBlock inside serveCodec bc headers and o…
hacdias Oct 13, 2022
55383cd
test: add test with pure json and cbor
hacdias Oct 17, 2022
a6d45c7
test: convert cbor <-> json
hacdias Oct 17, 2022
1986be1
test: path traversal and dag-pb output
hacdias Oct 19, 2022
9ef022e
fix: add more info about errors
hacdias Oct 20, 2022
cadc681
fix: add missing traversal
hacdias Oct 20, 2022
6331695
Merge branch 'master' into feat/8823
hacdias Nov 10, 2022
2c93672
fix: remove duplicate variable
hacdias Nov 10, 2022
462c71b
Merge branch 'master' into feat/8823
hacdias Nov 11, 2022
1e844c5
refactor: do not support traversal
hacdias Nov 11, 2022
bb98041
Update core/corehttp/gateway_handler_codec.go
hacdias Nov 15, 2022
53d5878
improve PR to match spec
hacdias Nov 16, 2022
4064f97
Merge branch 'master' into feat/8823
hacdias Nov 16, 2022
8c6a8da
feat: little web page
hacdias Nov 16, 2022
b5e5ff2
feat: update doc
hacdias Nov 16, 2022
8ca2a52
fix: Content-Disposition .json and .cbor
lidel Nov 23, 2022
b4dfa66
fix: inline disposition for JSON responses
lidel Nov 23, 2022
83913c7
refactor: return 501 for unsupported pathing
lidel Nov 23, 2022
ff55745
docs(cbor): improved info about codec
lidel Nov 23, 2022
52711d3
refactor: create template at assets/dag-index-html
lidel Nov 24, 2022
7e84856
fix(dag@gw): content type and cache headers
lidel Nov 25, 2022
3b89f20
Merge branch 'master' into feat/8823
hacdias Nov 28, 2022
162f435
add changelog info
hacdias Nov 28, 2022
12d0d7f
fix title
hacdias Nov 28, 2022
2d8ba78
rm wild block
hacdias Nov 30, 2022
b5874e7
Merge branch 'master' into feat/8823
hacdias Dec 5, 2022
0c08a76
Merge branch 'master' into feat/8823
hacdias Dec 5, 2022
f084f09
fix(dag-index-html): remove technical jargon
lidel Dec 5, 2022
32bcd41
Merge branch 'master' into feat/8823
lidel Dec 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
30 changes: 26 additions & 4 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
coreiface "github.com/ipfs/interface-go-ipfs-core"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
routing "github.com/libp2p/go-libp2p/core/routing"
mc "github.com/multiformats/go-multicodec"
prometheus "github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
Expand Down Expand Up @@ -417,9 +418,15 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request

// Support custom response formats passed via ?format or Accept HTTP header
switch responseFormat {
case "": // The implicit response format is UnixFS
logger.Debugw("serving unixfs", "path", contentPath)
i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
case "":
switch resolvedPath.Cid().Prefix().Codec {
case uint64(mc.Json), uint64(mc.DagJson), uint64(mc.Cbor), uint64(mc.DagCbor):
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
default:
logger.Debugw("serving unixfs", "path", contentPath)
i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
}
return
case "application/vnd.ipld.raw":
logger.Debugw("serving raw block", "path", contentPath)
Expand All @@ -434,6 +441,11 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
logger.Debugw("serving tar file", "path", contentPath)
i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
return
case "application/json", "application/vnd.ipld.dag-json",
"application/cbor", "application/vnd.ipld.dag-cbor":
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
return
default: // catch-all for unsuported application/vnd.*
err := fmt.Errorf("unsupported format %q", responseFormat)
webError(w, "failed respond with requested content type", err, http.StatusBadRequest)
Expand Down Expand Up @@ -866,6 +878,14 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
return "application/vnd.ipld.car", nil, nil
case "tar":
return "application/x-tar", nil, nil
case "dag-json":
return "application/vnd.ipld.dag-json", nil, nil
case "json":
return "application/json", nil, nil
case "dag-cbor":
return "application/vnd.ipld.dag-cbor", nil, nil
case "cbor":
return "application/cbor", nil, nil
}
}
// Browsers and other user agents will send Accept header with generic types like:
Expand All @@ -874,7 +894,9 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
for _, accept := range r.Header.Values("Accept") {
// respond to the very first ipld content type
if strings.HasPrefix(accept, "application/vnd.ipld") ||
strings.HasPrefix(accept, "application/x-tar") {
strings.HasPrefix(accept, "application/x-tar") ||
strings.HasPrefix(accept, "application/json") ||
strings.HasPrefix(accept, "application/cbor") {
mediatype, params, err := mime.ParseMediaType(accept)
if err != nil {
return "", nil, err
Expand Down
237 changes: 237 additions & 0 deletions core/corehttp/gateway_handler_codec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
package corehttp

import (
"bytes"
"context"
"fmt"
"html"
"io"
"net/http"
"strings"
"time"

ipldlegacy "github.com/ipfs/go-ipld-legacy"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"github.com/ipfs/kubo/tracing"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/multicodec"
mc "github.com/multiformats/go-multicodec"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
)

// codecToContentType maps the supported IPLD codecs to the HTTP Content
// Type they should have.
var codecToContentType = map[uint64]string{
uint64(mc.Json): "application/json",
uint64(mc.Cbor): "application/cbor",
uint64(mc.DagJson): "application/vnd.ipld.dag-json",
uint64(mc.DagCbor): "application/vnd.ipld.dag-cbor",
}

// contentTypeToCodecs maps the HTTP Content Type to the respective
// possible codecs. If the original data is in one of those codecs,
// we stream the raw bytes. Otherwise, we encode in the last codec
// of the list.
var contentTypeToCodecs = map[string][]uint64{
"application/json": {uint64(mc.Json), uint64(mc.DagJson)},
"application/vnd.ipld.dag-json": {uint64(mc.DagJson)},
"application/cbor": {uint64(mc.Cbor), uint64(mc.DagCbor)},
"application/vnd.ipld.dag-cbor": {uint64(mc.DagCbor)},
}

// contentTypeToExtension maps the HTTP Content Type to the respective file
// extension, used in Content-Disposition header when downloading the file.
var contentTypeToExtension = map[string]string{
"application/json": ".json",
"application/vnd.ipld.dag-json": ".json",
"application/cbor": ".cbor",
"application/vnd.ipld.dag-cbor": ".cbor",
}

func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, requestedContentType string) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType)))
defer span.End()

// If the resolved path still has some remainder, return bad request.
if resolvedPath.Remainder() != "" {
path := strings.TrimSuffix(resolvedPath.String(), resolvedPath.Remainder())
err := fmt.Errorf("%q of %q could not be returned: reading IPLD Kinds other than Links (CBOR Tag 42) is not implemented: try reading %q instead", resolvedPath.Remainder(), resolvedPath.String(), path)
webError(w, "unsupported pathing", err, http.StatusNotImplemented)
return
}

// No content type is specified by the user (via Accept, or format=). However,
// we support this format. Let's handle it.
if requestedContentType == "" {
cidCodec := resolvedPath.Cid().Prefix().Codec
isDAG := cidCodec == uint64(mc.DagJson) || cidCodec == uint64(mc.DagCbor)
acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html")

if isDAG && acceptsHTML {
i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath)
} else {
cidContentType, ok := codecToContentType[cidCodec]
if !ok {
// Should not happen unless function is called with wrong parameters.
err := fmt.Errorf("content type not found for codec: %v", cidCodec)
webError(w, "internal error", err, http.StatusInternalServerError)
return
}

i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, cidContentType)
}

return
}

// Otherwise, the user has requested a specific content type. Let's first get
// the codecs that can be used with this content type.
codecs, ok := contentTypeToCodecs[requestedContentType]
if !ok {
// This is never supposed to happen unless function is called with wrong parameters.
err := fmt.Errorf("unsupported content type: %s", requestedContentType)
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

// If the requested content type has "dag-", ALWAYS go through the encoding
// process in order to validate the content.
if strings.Contains(requestedContentType, "dag-") {
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, requestedContentType, codecs[len(codecs)-1])
return
}

// Otherwise, check if the data is encoded with the requested content type.
// If so, we can directly stream the raw data. serveRawBlock cannot be directly
// used here as it sets different headers.
for _, codec := range codecs {
if resolvedPath.Cid().Prefix().Codec == codec {
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, requestedContentType)
return
}
}

// Finally, if nothing of the above is true, we have to actually convert the codec.
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, requestedContentType, codecs[len(codecs)-1])
}

func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path) {
codecName := mc.Code(resolvedPath.Cid().Prefix().Codec).String()
body := fmt.Sprintf(`<!DOCTYPE html>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ℹ️ Pushed some changes in 0531e93 around the HTML generated here:

It looks like this:

Screen Shot 2022-11-24 at 17 34 52

We can improve it further in follow-up PRs, but it is good enough for 0.18.0-rc1.

Copy link
Member

@lidel lidel Nov 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @juliaxbow @SgtPooki – FYI this PR adds a stub/placeholder for HTML returned when a web browser asks gateways for things other than files and directories.

A Potential GUI project for 2023 would be to design a unified experience that can render both directory listings (example) and non-unixfs things like DAG-CBOR / DAG-JSON, and provide more information about DAG.
Something similar to https://explore.ipld.io (example), but less intimidating.

What would be the best place for tracking that? Should I create issue in https://github.com/ipfs/ipfs-gui?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely looks much better than the page I had 😃 Thanks for this!

Copy link
Member

@lidel lidel Dec 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed "This CID is not UnixFS" (f084f09 – suggested by @b5 during IPFS Implementers sync.

<html lang="en">
<head>
<meta charset="utf-8" />
</head>
<body>
<p>Requested CID <code>%q</code> uses <code>%q</code> codec.</p>
<ul>
<li><a href="?format=json" rel="nofollow">Preview as JSON</a> (<code>application/json</code>)</li>
<li>Download as
<ul>
<li><a href="?format=raw" rel="nofollow">Raw Block</a> (no conversion)</li>
<li><a href="?format=dag-json" rel="nofollow">DAG-JSON</a> (specs at <a href="https://ipld.io/specs/codecs/dag-json/spec/" rel="noreferrer nofollow">IPLD</a> and <a href="https://www.iana.org/assignments/media-types/application/vnd.ipld.dag-json" rel="noreferrer nofollow">IANA</a>)</li>
<li><a href="?format=dag-cbor" rel="nofollow">DAG-CBOR</a> (specs at <a href="https://ipld.io/specs/codecs/dag-cbor/spec/" rel="noreferrer nofollow">IPLD</a> and <a href="https://www.iana.org/assignments/media-types/application/vnd.ipld.dag-cbor" rel="noreferrer nofollow">IANA</a>)</li>
</ul>
</li>
</ul>
</body>
</html>
`, resolvedPath.Cid(), codecName)

_, _ = w.Write([]byte(body))
}

func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, contentType string) {
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
name := setCodecContentDisposition(w, r, resolvedPath, contentType)
w.Header().Set("Content-Type", contentType)
w.Header().Set("X-Content-Type-Options", "nosniff")

blockCid := resolvedPath.Cid()
blockReader, err := i.api.Block().Get(ctx, resolvedPath)
if err != nil {
webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError)
return
}
block, err := io.ReadAll(blockReader)
if err != nil {
webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError)
return
}
content := bytes.NewReader(block)

// ServeContent will take care of
// If-None-Match+Etag, Content-Length and range requests
_, _, _ = ServeContent(w, r, name, modtime, content)
}

func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, contentType string, codec uint64) {
obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid())
if err != nil {
webError(w, "ipfs dag get "+html.EscapeString(resolvedPath.String()), err, http.StatusInternalServerError)
return
}

universal, ok := obj.(ipldlegacy.UniversalNode)
if !ok {
err = fmt.Errorf("%T is not a valid IPLD node", obj)
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}
finalNode := universal.(ipld.Node)

encoder, err := multicodec.LookupEncoder(codec)
if err != nil {
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

// Keep it in memory so we can detect encoding errors in order to conform
// to the specification.
var buf bytes.Buffer
err = encoder(finalNode, &buf)
if err != nil {
webError(w, err.Error(), err, http.StatusInternalServerError)
return
}

// Set Cache-Control and read optional Last-Modified time
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
setCodecContentDisposition(w, r, resolvedPath, contentType)
w.Header().Set("Content-Type", contentType)
w.Header().Set("X-Content-Type-Options", "nosniff")

// Sets correct Last-Modified header. This code is borrowed from the standard
// library (net/http/server.go) as we cannot use serveFile.
if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) {
w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat))
}

_, _ = w.Write(buf.Bytes())
}

func setCodecContentDisposition(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentType string) string {
var dispType, name string

ext, ok := contentTypeToExtension[contentType]
if !ok {
// Should never happen.
ext = ".bin"
}

if urlFilename := r.URL.Query().Get("filename"); urlFilename != "" {
name = urlFilename
} else {
name = resolvedPath.Cid().String() + ext
}

switch ext {
case ".json": // codecs that serialize to JSON can be rendered by browsers
dispType = "inline"
default: // everything else is assumed binary / opaque bytes
dispType = "attachment"
}
setContentDispositionHeader(w, name, dispType)
return name
}
Loading