Skip to content

Commit

Permalink
fix(gateway): do not convert unixfs/raw into dag-* unless explicit
Browse files Browse the repository at this point in the history
  • Loading branch information
hacdias committed Jan 19, 2023
1 parent 73ebad1 commit e2cf012
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 98 deletions.
124 changes: 64 additions & 60 deletions core/corehttp/gateway_handler_codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,14 @@ import (
"go.opentelemetry.io/otel/trace"
)

// codecToContentType maps the supported IPLD codecs to the HTTP Content
// Type they should have.
var codecToContentType = map[uint64]string{
uint64(mc.Json): "application/json",
uint64(mc.Cbor): "application/cbor",
uint64(mc.DagJson): "application/vnd.ipld.dag-json",
uint64(mc.DagCbor): "application/vnd.ipld.dag-cbor",
}

// contentTypeToCodecs maps the HTTP Content Type to the respective
// possible codecs. If the original data is in one of those codecs,
// we stream the raw bytes. Otherwise, we encode in the last codec
// of the list.
var contentTypeToCodecs = map[string][]uint64{
"application/json": {uint64(mc.Json), uint64(mc.DagJson)},
"application/vnd.ipld.dag-json": {uint64(mc.DagJson)},
"application/cbor": {uint64(mc.Cbor), uint64(mc.DagCbor)},
"application/vnd.ipld.dag-cbor": {uint64(mc.DagCbor)},
// convertibleCodecs maps supported input codecs into supported output codecs.
var convertibleCodecs = map[mc.Code][]mc.Code{
mc.Raw: {mc.DagCbor, mc.DagJson},
mc.DagPb: {mc.DagCbor, mc.DagJson},
mc.DagJson: {mc.DagCbor, mc.DagJson},
mc.DagCbor: {mc.DagCbor, mc.DagJson},
mc.Json: {mc.Cbor, mc.Json, mc.DagCbor, mc.DagJson},
mc.Cbor: {mc.Cbor, mc.Json, mc.DagCbor, mc.DagJson},
}

// contentTypeToExtension maps the HTTP Content Type to the respective file
Expand All @@ -52,13 +42,39 @@ var contentTypeToExtension = map[string]string{
"application/vnd.ipld.dag-cbor": ".cbor",
}

// getResponseContentTypeAndCodec returns the response content type and codec based
// on the requested content type and CID codec. The requested content type has
// priority over the CID codec.
func getResponseContentTypeAndCodec(requestedContentType string, codec mc.Code) (string, mc.Code) {
switch requestedContentType {
case "application/json":
return "application/json", mc.Json
case "application/cbor":
return "application/cbor", mc.Cbor
case "application/vnd.ipld.dag-json":
return "application/vnd.ipld.dag-json", mc.DagJson
case "application/vnd.ipld.dag-cbor":
return "application/vnd.ipld.dag-cbor", mc.DagCbor
}

switch codec {
case mc.Json:
return "application/json", mc.Json
case mc.Cbor:
return "application/cbor", mc.Cbor
case mc.DagJson:
return "application/vnd.ipld.dag-json", mc.DagJson
case mc.DagCbor:
return "application/vnd.ipld.dag-cbor", mc.DagCbor
}

return "", 0
}

func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, requestedContentType string) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType)))
defer span.End()

cidCodec := resolvedPath.Cid().Prefix().Codec
responseContentType := requestedContentType

// If the resolved path still has some remainder, return error for now.
// TODO: handle this when we have IPLD Patch (https://ipld.io/specs/patch/) via HTTP PUT
// TODO: (depends on https://github.com/ipfs/kubo/issues/4801 and https://github.com/ipfs/kubo/issues/4782)
Expand All @@ -69,19 +85,17 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
return
}

// If no explicit content type was requested, the response will have one based on the codec from the CID
if requestedContentType == "" {
cidContentType, ok := codecToContentType[cidCodec]
if !ok {
// Should not happen unless function is called with wrong parameters.
err := fmt.Errorf("content type not found for codec: %v", cidCodec)
webError(w, "internal error", err, http.StatusInternalServerError)
return
}
responseContentType = cidContentType
cidCodec := mc.Code(resolvedPath.Cid().Prefix().Codec)
responseContentType, responseCodec := getResponseContentTypeAndCodec(requestedContentType, cidCodec)

// This should never happen unless function is called with wrong parameters.
if responseContentType == "" {
err := fmt.Errorf("content type not found for codec: %v", cidCodec)
webError(w, "internal error", err, http.StatusInternalServerError)
return
}

// Set HTTP headers (for caching etc)
// Set HTTP headers (for caching, etc).
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())
name := setCodecContentDisposition(w, r, resolvedPath, responseContentType)
w.Header().Set("Content-Type", responseContentType)
Expand All @@ -90,51 +104,41 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter,
// No content type is specified by the user (via Accept, or format=). However,
// we support this format. Let's handle it.
if requestedContentType == "" {
isDAG := cidCodec == uint64(mc.DagJson) || cidCodec == uint64(mc.DagCbor)
isDAG := responseCodec == mc.DagJson || responseCodec == mc.DagCbor
acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html")
download := r.URL.Query().Get("download") == "true"

if isDAG && acceptsHTML && !download {
i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath)
} else {
// Here we cannot use serveRawBlock because we want to use the right
// content type as we know the content type we are serving.
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
}

return
}

// Otherwise, the user has requested a specific content type. Let's first get
// the codecs that can be used with this content type.
codecs, ok := contentTypeToCodecs[requestedContentType]
if !ok {
// This is never supposed to happen unless function is called with wrong parameters.
err := fmt.Errorf("unsupported content type: %s", requestedContentType)
webError(w, err.Error(), err, http.StatusInternalServerError)
// This should never happen unless the function is called with wrong parameters.
if _, ok := convertibleCodecs[cidCodec]; !ok {
err := fmt.Errorf("codec cannot be handled: %v", cidCodec)
webError(w, "internal error", err, http.StatusInternalServerError)
return
}

// If we need to convert, use the last codec (strict dag- variant)
toCodec := codecs[len(codecs)-1]

// If the requested content type has "dag-", ALWAYS go through the encoding
// process in order to validate the content.
if strings.Contains(requestedContentType, "dag-") {
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
return
}

// Otherwise, check if the data is encoded with the requested content type.
// If so, we can directly stream the raw data. serveRawBlock cannot be directly
// used here as it sets different headers.
for _, codec := range codecs {
if resolvedPath.Cid().Prefix().Codec == codec {
i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime)
// If the user has requested a CID in some content type that can be converted
// to the target content type, we serve it converted with the correct headers.
for _, targetCodec := range convertibleCodecs[cidCodec] {
if targetCodec == responseCodec {
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, responseCodec, modtime)
return
}
}

// Finally, if nothing of the above is true, we have to actually convert the codec.
i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime)
// If the user has requested for a conversion that is not possible (such as
// requesting a UnixFS file as a JSON), we defer to the regular serve raw block
// function that will serve the data behind it.
i.serveRawBlock(ctx, w, r, resolvedPath, contentPath, begin)
}

func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path) {
Expand Down Expand Up @@ -184,7 +188,7 @@ func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWrite
_, _, _ = ServeContent(w, r, name, modtime, content)
}

func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec uint64, modtime time.Time) {
func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec mc.Code, modtime time.Time) {
obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid())
if err != nil {
webError(w, "ipfs dag get "+html.EscapeString(resolvedPath.String()), err, http.StatusInternalServerError)
Expand All @@ -199,7 +203,7 @@ func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.Respons
}
finalNode := universal.(ipld.Node)

encoder, err := multicodec.LookupEncoder(toCodec)
encoder, err := multicodec.LookupEncoder(uint64(toCodec))
if err != nil {
webError(w, err.Error(), err, http.StatusInternalServerError)
return
Expand Down
55 changes: 17 additions & 38 deletions test/sharness/t0123-gateway-json-cbor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,22 @@ test_dag_pb_headers () {
test_should_not_contain "Content-Type: application/$format" curl_output
'

test_expect_success "GET UnixFS as $name with format=$format has expected Content-Type" '
test_expect_success "GET UnixFS as $name with 'Accept: foo, application/vnd.ipld.dag-$format,bar' has expected Content-Type" '
curl -sD - -H "Accept: foo, application/vnd.ipld.dag-$format,text/plain" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 &&
test_should_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output
'

test_expect_success "GET UnixFS with format=$format returns raw (no conversion)" '
curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" > curl_output 2>&1 &&
test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" curl_output &&
test_should_contain "Content-Type: application/$format" curl_output &&
test_should_not_contain "Content-Type: application/$format" curl_output &&
test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output
'

test_expect_success "GET UnixFS as $name with 'Accept: application/$format' has expected Content-Type" '
test_expect_success "GET UnixFS with 'Accept: application/$format' returns raw (no conversion)" '
curl -sD - -H "Accept: application/$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 &&
test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" curl_output &&
test_should_contain "Content-Type: application/$format" curl_output &&
test_should_not_contain "Content-Type: application/$format" curl_output &&
test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output
'

test_expect_success "GET UnixFS as $name with 'Accept: foo, application/$format,bar' has expected Content-Type" '
curl -sD - -H "Accept: foo, application/$format,text/plain" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 &&
test_should_contain "Content-Type: application/$format" curl_output
'
}

test_dag_pb_headers "DAG-JSON" "json" "inline"
Expand All @@ -81,12 +79,6 @@ test_dag_pb () {
ipfs dag get --output-codec dag-$format $DIR_CID > ipfs_dag_get_output 2>&1 &&
test_cmp ipfs_dag_get_output curl_output
'

test_expect_success "GET UnixFS as $name with format=dag-$format and format=$format produce same output" '
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=dag-$format" > curl_output_1 2>&1 &&
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=$format" > curl_output_2 2>&1 &&
test_cmp curl_output_1 curl_output_2
'
}

test_dag_pb "DAG-JSON" "json"
Expand Down Expand Up @@ -148,17 +140,17 @@ test_cmp_dag_get "CBOR" "cbor" "attachment"

## Lossless conversion between JSON and CBOR

test_expect_success "GET JSON as CBOR produces DAG-CBOR output" '
test_expect_success "GET JSON as CBOR produces CBOR output" '
CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec json) &&
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=cbor" > curl_output 2>&1 &&
ipfs dag get --output-codec dag-cbor $CID > ipfs_dag_get_output 2>&1 &&
ipfs dag get --output-codec cbor $CID > ipfs_dag_get_output 2>&1 &&
test_cmp ipfs_dag_get_output curl_output
'

test_expect_success "GET CBOR as JSON produces DAG-JSON output" '
test_expect_success "GET CBOR as JSON produces JSON output" '
CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec cbor) &&
curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=json" > curl_output 2>&1 &&
ipfs dag get --output-codec dag-json $CID > ipfs_dag_get_output 2>&1 &&
ipfs dag get --output-codec json $CID > ipfs_dag_get_output 2>&1 &&
test_cmp ipfs_dag_get_output curl_output
'

Expand Down Expand Up @@ -204,12 +196,6 @@ test_expect_success "GET DAG-CBOR traverses multiple links" '
test_cmp expected actual
'

# test_expect_success "GET DAG-PB has expected output" '
# curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_PB_CID?format=dag-json" > curl_output 2>&1 &&
# jq --sort-keys . curl_output > actual &&
# test_cmp ../t0123-gateway-json-cbor/dag-pb.json actual
# '


## NATIVE TESTS:
## DAG- regression tests for core behaviors when native DAG-(CBOR|JSON) is requested
Expand Down Expand Up @@ -302,18 +288,11 @@ test_native_dag () {
test_should_contain "Content-Type: application/vnd.ipld.dag-$format" output &&
test_should_contain "Content-Length: " output
'
test_expect_success "HEAD $name with an explicit JSON format returns HTTP 200" '
curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=json" -o output &&
test_should_contain "HTTP/1.1 200 OK" output &&
test_should_contain "Etag: \"$CID.json\"" output &&
test_should_contain "Content-Type: application/json" output &&
test_should_contain "Content-Length: " output
'
test_expect_success "HEAD dag-pb with ?format=$format returns HTTP 200" '
curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" -o output &&
test_expect_success "HEAD $name with an explicit DAG-JSON format returns HTTP 200" '
curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-json" -o output &&
test_should_contain "HTTP/1.1 200 OK" output &&
test_should_contain "Etag: \"$FILE_CID.$format\"" output &&
test_should_contain "Content-Type: application/$format" output &&
test_should_contain "Etag: \"$CID.dag-json\"" output &&
test_should_contain "Content-Type: application/vnd.ipld.dag-json" output &&
test_should_contain "Content-Length: " output
'
test_expect_success "HEAD $name with only-if-cached for missing block returns HTTP 412 Precondition Failed" '
Expand Down

0 comments on commit e2cf012

Please sign in to comment.