Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(gateway): do not convert unixfs/raw into dag-* unless explicit #9565

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
switch resolvedPath.Cid().Prefix().Codec {
case uint64(mc.Json), uint64(mc.DagJson), uint64(mc.Cbor), uint64(mc.DagCbor):
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat, logger)
default:
logger.Debugw("serving unixfs", "path", contentPath)
i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
Expand All @@ -444,7 +444,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
case "application/json", "application/vnd.ipld.dag-json",
"application/cbor", "application/vnd.ipld.dag-cbor":
logger.Debugw("serving codec", "path", contentPath)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat)
i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat, logger)
return
default: // catch-all for unsuported application/vnd.*
err := fmt.Errorf("unsupported format %q", responseFormat)
Expand Down
127 changes: 66 additions & 61 deletions core/corehttp/gateway_handler_codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,17 @@ import (
mc "github.com/multiformats/go-multicodec"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)

// codecToContentType maps the supported IPLD codecs to the HTTP Content
// Type they should have.
var codecToContentType = map[uint64]string{
uint64(mc.Json): "application/json",
uint64(mc.Cbor): "application/cbor",
uint64(mc.DagJson): "application/vnd.ipld.dag-json",
uint64(mc.DagCbor): "application/vnd.ipld.dag-cbor",
}

// contentTypeToCodecs maps the HTTP Content Type to the respective
// possible codecs. If the original data is in one of those codecs,
// we stream the raw bytes. Otherwise, we encode in the last codec
// of the list.
var contentTypeToCodecs = map[string][]uint64{
"application/json": {uint64(mc.Json), uint64(mc.DagJson)},
"application/vnd.ipld.dag-json": {uint64(mc.DagJson)},
"application/cbor": {uint64(mc.Cbor), uint64(mc.DagCbor)},
"application/vnd.ipld.dag-cbor": {uint64(mc.DagCbor)},
// convertibleCodecs maps supported input codecs into supported output codecs.
var convertibleCodecs = map[mc.Code][]mc.Code{
mc.Raw: {mc.DagCbor, mc.DagJson},
mc.DagPb: {mc.DagCbor, mc.DagJson},
mc.DagJson: {mc.DagCbor, mc.DagJson},
mc.DagCbor: {mc.DagCbor, mc.DagJson},
mc.Json: {mc.Cbor, mc.Json, mc.DagCbor, mc.DagJson},
mc.Cbor: {mc.Cbor, mc.Json, mc.DagCbor, mc.DagJson},
}

// contentTypeToExtension maps the HTTP Content Type to the respective file
Expand All @@ -52,13 +43,39 @@ var contentTypeToExtension = map[string]string{
"application/vnd.ipld.dag-cbor": ".cbor",
}

func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, requestedContentType string) {
// getResponseContentTypeAndCodec returns the response content type and codec based
// on the requested content type and CID codec. The requested content type has
// priority over the CID codec.
func getResponseContentTypeAndCodec(requestedContentType string, codec mc.Code) (string, mc.Code) {
switch requestedContentType {
case "application/json":
return "application/json", mc.Json
case "application/cbor":
return "application/cbor", mc.Cbor
case "application/vnd.ipld.dag-json":
return "application/vnd.ipld.dag-json", mc.DagJson
case "application/vnd.ipld.dag-cbor":
return "application/vnd.ipld.dag-cbor", mc.DagCbor
}

switch codec {
case mc.Json:
return "application/json", mc.Json
case mc.Cbor:
return "application/cbor", mc.Cbor
case mc.DagJson:
return "application/vnd.ipld.dag-json", mc.DagJson
case mc.DagCbor:
return "application/vnd.ipld.dag-cbor", mc.DagCbor
}

return "", 0
}

func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, requestedContentType string, logger *zap.SugaredLogger) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType)))
defer span.End()

cidCodec := resolvedPath.Cid().Prefix().Codec
responseContentType := requestedContentType

// If the resolved path still has some remainder, return error for now.
// TODO: handle this when we have IPLD Patch (https://ipld.io/specs/patch/) via HTTP PUT
// TODO: (depends on https://github.com/ipfs/kubo/issues/4801 and https://github.com/ipfs/kubo/issues/4782)
Expand All @@ -69,19 +86,17 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
return
}

// If no explicit content type was requested, the response will have one based on the codec from the CID
if requestedContentType == "" {
cidContentType, ok := codecToContentType[cidCodec]
if !ok {
// Should not happen unless function is called with wrong parameters.
err := fmt.Errorf("content type not found for codec: %v", cidCodec)
webError(w, "internal error", err, http.StatusInternalServerError)
return
}
responseContentType = cidContentType
cidCodec := mc.Code(resolvedPath.Cid().Prefix().Codec)
responseContentType, responseCodec := getResponseContentTypeAndCodec(requestedContentType, cidCodec)

// This should never happen unless function is called with wrong parameters.
if responseContentType == "" {
err := fmt.Errorf("content type not found for codec: %v", cidCodec)
webError(w, "internal error", err, http.StatusInternalServerError)
return
}

// Set HTTP headers (for caching etc)
// Set HTTP headers (for caching, etc).
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
name := setCodecContentDisposition(w, r, resolvedPath, responseContentType)
w.Header().Set("Content-Type", responseContentType)
Expand All @@ -90,51 +105,41 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
// No content type is specified by the user (via Accept, or format=). However,
// we support this format. Let's handle it.
if requestedContentType == "" {
isDAG := cidCodec == uint64(mc.DagJson) || cidCodec == uint64(mc.DagCbor)
isDAG := responseCodec == mc.DagJson || responseCodec == mc.DagCbor
acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html")
download := r.URL.Query().Get("download") == "true"

if isDAG && acceptsHTML && !download {
i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath)
} else {
// Here we cannot use serveRawBlock because we want to use the right
// content type as we know the content type we are serving.
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
}

return
}

// Otherwise, the user has requested a specific content type. Let's first get
// the codecs that can be used with this content type.
codecs, ok := contentTypeToCodecs[requestedContentType]
if !ok {
// This is never supposed to happen unless function is called with wrong parameters.
err := fmt.Errorf("unsupported content type: %s", requestedContentType)
webError(w, err.Error(), err, http.StatusInternalServerError)
// This should never happen unless the function is called with wrong parameters.
if _, ok := convertibleCodecs[cidCodec]; !ok {
err := fmt.Errorf("codec cannot be handled: %v", cidCodec)
webError(w, "internal error", err, http.StatusInternalServerError)
return
}

// If we need to convert, use the last codec (strict dag- variant)
toCodec := codecs[len(codecs)-1]

// If the requested content type has "dag-", ALWAYS go through the encoding
// process in order to validate the content.
if strings.Contains(requestedContentType, "dag-") {
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
return
}

// Otherwise, check if the data is encoded with the requested content type.
// If so, we can directly stream the raw data. serveRawBlock cannot be directly
// used here as it sets different headers.
for _, codec := range codecs {
if resolvedPath.Cid().Prefix().Codec == codec {
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
// If the user has requested a CID in some content type that can be converted
// to the target content type, we serve it converted with the correct headers.
for _, targetCodec := range convertibleCodecs[cidCodec] {
if targetCodec == responseCodec {
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, responseCodec, modtime)
return
}
}

// Finally, if nothing of the above is true, we have to actually convert the codec.
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
// If the user has requested for a conversion that is not possible (such as
// requesting a UnixFS file as a JSON), we defer to the regular serve UnixFS
// function that will serve the data behind it accordingly.
i.serveUnixFS(ctx, w, r, resolvedPath, contentPath, begin, logger)
}

func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path) {
Expand Down Expand Up @@ -184,7 +189,7 @@ func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWrite
_, _, _ = ServeContent(w, r, name, modtime, content)
}

func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec uint64, modtime time.Time) {
func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec mc.Code, modtime time.Time) {
obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid())
if err != nil {
webError(w, "ipfs dag get "+html.EscapeString(resolvedPath.String()), err, http.StatusInternalServerError)
Expand All @@ -199,7 +204,7 @@ func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.Respons
}
finalNode := universal.(ipld.Node)

encoder, err := multicodec.LookupEncoder(toCodec)
encoder, err := multicodec.LookupEncoder(uint64(toCodec))
if err != nil {
webError(w, err.Error(), err, http.StatusInternalServerError)
return
Expand Down
55 changes: 17 additions & 38 deletions test/sharness/t0123-gateway-json-cbor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,22 @@ test_dag_pb_headers () {
test_should_not_contain "Content-Type: application/$format" curl_output
'

test_expect_success "GET UnixFS as $name with format=$format has expected Content-Type" '
test_expect_success "GET UnixFS as $name with 'Accept: foo, application/vnd.ipld.dag-$format,bar' has expected Content-Type" '
curl -sD - -H "Accept: foo, application/vnd.ipld.dag-$format,text/plain" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 &&
test_should_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output
'

test_expect_success "GET UnixFS with format=$format returns raw (no conversion)" '
curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" > curl_output 2>&1 &&
test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" curl_output &&
test_should_contain "Content-Type: application/$format" curl_output &&
test_should_not_contain "Content-Type: application/$format" curl_output &&
test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output
'

test_expect_success "GET UnixFS as $name with 'Accept: application/$format' has expected Content-Type" '
test_expect_success "GET UnixFS with 'Accept: application/$format' returns raw (no conversion)" '
curl -sD - -H "Accept: application/$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 &&
test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" curl_output &&
test_should_contain "Content-Type: application/$format" curl_output &&
test_should_not_contain "Content-Type: application/$format" curl_output &&
test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output
'

test_expect_success "GET UnixFS as $name with 'Accept: foo, application/$format,bar' has expected Content-Type" '
curl -sD - -H "Accept: foo, application/$format,text/plain" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 &&
test_should_contain "Content-Type: application/$format" curl_output
'
}

test_dag_pb_headers "DAG-JSON" "json" "inline"
Expand All @@ -81,12 +79,6 @@ test_dag_pb () {
ipfs dag get --output-codec dag-$format $DIR_CID > ipfs_dag_get_output 2>&1 &&
test_cmp ipfs_dag_get_output curl_output
'

test_expect_success "GET UnixFS as $name with format=dag-$format and format=$format produce same output" '
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=dag-$format" > curl_output_1 2>&1 &&
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=$format" > curl_output_2 2>&1 &&
test_cmp curl_output_1 curl_output_2
'
}

test_dag_pb "DAG-JSON" "json"
Expand Down Expand Up @@ -148,17 +140,17 @@ test_cmp_dag_get "CBOR" "cbor" "attachment"

## Lossless conversion between JSON and CBOR

test_expect_success "GET JSON as CBOR produces DAG-CBOR output" '
test_expect_success "GET JSON as CBOR produces CBOR output" '
CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec json) &&
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=cbor" > curl_output 2>&1 &&
ipfs dag get --output-codec dag-cbor $CID > ipfs_dag_get_output 2>&1 &&
ipfs dag get --output-codec cbor $CID > ipfs_dag_get_output 2>&1 &&
test_cmp ipfs_dag_get_output curl_output
'

test_expect_success "GET CBOR as JSON produces DAG-JSON output" '
test_expect_success "GET CBOR as JSON produces JSON output" '
CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec cbor) &&
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=json" > curl_output 2>&1 &&
ipfs dag get --output-codec dag-json $CID > ipfs_dag_get_output 2>&1 &&
ipfs dag get --output-codec json $CID > ipfs_dag_get_output 2>&1 &&
test_cmp ipfs_dag_get_output curl_output
'

Expand Down Expand Up @@ -204,12 +196,6 @@ test_expect_success "GET DAG-CBOR traverses multiple links" '
test_cmp expected actual
'

# test_expect_success "GET DAG-PB has expected output" '
# curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_PB_CID?format=dag-json" > curl_output 2>&1 &&
# jq --sort-keys . curl_output > actual &&
# test_cmp ../t0123-gateway-json-cbor/dag-pb.json actual
# '


## NATIVE TESTS:
## DAG- regression tests for core behaviors when native DAG-(CBOR|JSON) is requested
Expand Down Expand Up @@ -302,18 +288,11 @@ test_native_dag () {
test_should_contain "Content-Type: application/vnd.ipld.dag-$format" output &&
test_should_contain "Content-Length: " output
'
test_expect_success "HEAD $name with an explicit JSON format returns HTTP 200" '
curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=json" -o output &&
test_should_contain "HTTP/1.1 200 OK" output &&
test_should_contain "Etag: \"$CID.json\"" output &&
test_should_contain "Content-Type: application/json" output &&
test_should_contain "Content-Length: " output
'
test_expect_success "HEAD dag-pb with ?format=$format returns HTTP 200" '
curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" -o output &&
test_expect_success "HEAD $name with an explicit DAG-JSON format returns HTTP 200" '
curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-json" -o output &&
test_should_contain "HTTP/1.1 200 OK" output &&
test_should_contain "Etag: \"$FILE_CID.$format\"" output &&
test_should_contain "Content-Type: application/$format" output &&
test_should_contain "Etag: \"$CID.dag-json\"" output &&
test_should_contain "Content-Type: application/vnd.ipld.dag-json" output &&
test_should_contain "Content-Length: " output
'
test_expect_success "HEAD $name with only-if-cached for missing block returns HTTP 412 Precondition Failed" '
Expand Down