diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index c20f112d76a..d2ffdebc97e 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -422,7 +422,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request switch resolvedPath.Cid().Prefix().Codec { case uint64(mc.Json), uint64(mc.DagJson), uint64(mc.Cbor), uint64(mc.DagCbor): logger.Debugw("serving codec", "path", contentPath) - i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat) + i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat, logger) default: logger.Debugw("serving unixfs", "path", contentPath) i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger) @@ -444,7 +444,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request case "application/json", "application/vnd.ipld.dag-json", "application/cbor", "application/vnd.ipld.dag-cbor": logger.Debugw("serving codec", "path", contentPath) - i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat) + i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat, logger) return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) diff --git a/core/corehttp/gateway_handler_codec.go b/core/corehttp/gateway_handler_codec.go index 95a151c7943..66adf315b8a 100644 --- a/core/corehttp/gateway_handler_codec.go +++ b/core/corehttp/gateway_handler_codec.go @@ -21,26 +21,17 @@ import ( mc "github.com/multiformats/go-multicodec" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" ) -// codecToContentType maps the supported IPLD codecs to the HTTP Content -// Type they should have. -var codecToContentType = map[uint64]string{ - uint64(mc.Json): "application/json", - uint64(mc.Cbor): "application/cbor", - uint64(mc.DagJson): "application/vnd.ipld.dag-json", - uint64(mc.DagCbor): "application/vnd.ipld.dag-cbor", -} - -// contentTypeToCodecs maps the HTTP Content Type to the respective -// possible codecs. If the original data is in one of those codecs, -// we stream the raw bytes. Otherwise, we encode in the last codec -// of the list. -var contentTypeToCodecs = map[string][]uint64{ - "application/json": {uint64(mc.Json), uint64(mc.DagJson)}, - "application/vnd.ipld.dag-json": {uint64(mc.DagJson)}, - "application/cbor": {uint64(mc.Cbor), uint64(mc.DagCbor)}, - "application/vnd.ipld.dag-cbor": {uint64(mc.DagCbor)}, +// convertibleCodecs maps supported input codecs into supported output codecs. +var convertibleCodecs = map[mc.Code][]mc.Code{ + mc.Raw: {mc.DagCbor, mc.DagJson}, + mc.DagPb: {mc.DagCbor, mc.DagJson}, + mc.DagJson: {mc.DagCbor, mc.DagJson}, + mc.DagCbor: {mc.DagCbor, mc.DagJson}, + mc.Json: {mc.Cbor, mc.Json, mc.DagCbor, mc.DagJson}, + mc.Cbor: {mc.Cbor, mc.Json, mc.DagCbor, mc.DagJson}, } // contentTypeToExtension maps the HTTP Content Type to the respective file @@ -52,13 +43,39 @@ var contentTypeToExtension = map[string]string{ "application/vnd.ipld.dag-cbor": ".cbor", } -func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, requestedContentType string) { +// getResponseContentTypeAndCodec returns the response content type and codec based +// on the requested content type and CID codec. The requested content type has +// priority over the CID codec. +func getResponseContentTypeAndCodec(requestedContentType string, codec mc.Code) (string, mc.Code) { + switch requestedContentType { + case "application/json": + return "application/json", mc.Json + case "application/cbor": + return "application/cbor", mc.Cbor + case "application/vnd.ipld.dag-json": + return "application/vnd.ipld.dag-json", mc.DagJson + case "application/vnd.ipld.dag-cbor": + return "application/vnd.ipld.dag-cbor", mc.DagCbor + } + + switch codec { + case mc.Json: + return "application/json", mc.Json + case mc.Cbor: + return "application/cbor", mc.Cbor + case mc.DagJson: + return "application/vnd.ipld.dag-json", mc.DagJson + case mc.DagCbor: + return "application/vnd.ipld.dag-cbor", mc.DagCbor + } + + return "", 0 +} + +func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, requestedContentType string, logger *zap.SugaredLogger) { ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType))) defer span.End() - cidCodec := resolvedPath.Cid().Prefix().Codec - responseContentType := requestedContentType - // If the resolved path still has some remainder, return error for now. // TODO: handle this when we have IPLD Patch (https://ipld.io/specs/patch/) via HTTP PUT // TODO: (depends on https://github.com/ipfs/kubo/issues/4801 and https://github.com/ipfs/kubo/issues/4782) @@ -69,19 +86,17 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, return } - // If no explicit content type was requested, the response will have one based on the codec from the CID - if requestedContentType == "" { - cidContentType, ok := codecToContentType[cidCodec] - if !ok { - // Should not happen unless function is called with wrong parameters. - err := fmt.Errorf("content type not found for codec: %v", cidCodec) - webError(w, "internal error", err, http.StatusInternalServerError) - return - } - responseContentType = cidContentType + cidCodec := mc.Code(resolvedPath.Cid().Prefix().Codec) + responseContentType, responseCodec := getResponseContentTypeAndCodec(requestedContentType, cidCodec) + + // This should never happen unless function is called with wrong parameters. + if responseContentType == "" { + err := fmt.Errorf("content type not found for codec: %v", cidCodec) + webError(w, "internal error", err, http.StatusInternalServerError) + return } - // Set HTTP headers (for caching etc) + // Set HTTP headers (for caching, etc). modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid()) name := setCodecContentDisposition(w, r, resolvedPath, responseContentType) w.Header().Set("Content-Type", responseContentType) @@ -90,51 +105,41 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, // No content type is specified by the user (via Accept, or format=). However, // we support this format. Let's handle it. if requestedContentType == "" { - isDAG := cidCodec == uint64(mc.DagJson) || cidCodec == uint64(mc.DagCbor) + isDAG := responseCodec == mc.DagJson || responseCodec == mc.DagCbor acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html") download := r.URL.Query().Get("download") == "true" if isDAG && acceptsHTML && !download { i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath) } else { + // Here we cannot use serveRawBlock because we want to use the right + // content type as we know the content type we are serving. i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) } return } - // Otherwise, the user has requested a specific content type. Let's first get - // the codecs that can be used with this content type. - codecs, ok := contentTypeToCodecs[requestedContentType] - if !ok { - // This is never supposed to happen unless function is called with wrong parameters. - err := fmt.Errorf("unsupported content type: %s", requestedContentType) - webError(w, err.Error(), err, http.StatusInternalServerError) + // This should never happen unless the function is called with wrong parameters. + if _, ok := convertibleCodecs[cidCodec]; !ok { + err := fmt.Errorf("codec cannot be handled: %v", cidCodec) + webError(w, "internal error", err, http.StatusInternalServerError) return } - // If we need to convert, use the last codec (strict dag- variant) - toCodec := codecs[len(codecs)-1] - - // If the requested content type has "dag-", ALWAYS go through the encoding - // process in order to validate the content. - if strings.Contains(requestedContentType, "dag-") { - i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime) - return - } - - // Otherwise, check if the data is encoded with the requested content type. - // If so, we can directly stream the raw data. serveRawBlock cannot be directly - // used here as it sets different headers. - for _, codec := range codecs { - if resolvedPath.Cid().Prefix().Codec == codec { - i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) + // If the user has requested a CID in some content type that can be converted + // to the target content type, we serve it converted with the correct headers. + for _, targetCodec := range convertibleCodecs[cidCodec] { + if targetCodec == responseCodec { + i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, responseCodec, modtime) return } } - // Finally, if nothing of the above is true, we have to actually convert the codec. - i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime) + // If the user has requested for a conversion that is not possible (such as + // requesting a UnixFS file as a JSON), we defer to the regular serve UnixFS + // function that will serve the data behind it accordingly. + i.serveUnixFS(ctx, w, r, resolvedPath, contentPath, begin, logger) } func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path) { @@ -184,7 +189,7 @@ func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWrite _, _, _ = ServeContent(w, r, name, modtime, content) } -func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec uint64, modtime time.Time) { +func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec mc.Code, modtime time.Time) { obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid()) if err != nil { webError(w, "ipfs dag get "+html.EscapeString(resolvedPath.String()), err, http.StatusInternalServerError) @@ -199,7 +204,7 @@ func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.Respons } finalNode := universal.(ipld.Node) - encoder, err := multicodec.LookupEncoder(toCodec) + encoder, err := multicodec.LookupEncoder(uint64(toCodec)) if err != nil { webError(w, err.Error(), err, http.StatusInternalServerError) return diff --git a/test/sharness/t0123-gateway-json-cbor.sh b/test/sharness/t0123-gateway-json-cbor.sh index f4ebca19d2c..d41e730ed42 100755 --- a/test/sharness/t0123-gateway-json-cbor.sh +++ b/test/sharness/t0123-gateway-json-cbor.sh @@ -43,24 +43,22 @@ test_dag_pb_headers () { test_should_not_contain "Content-Type: application/$format" curl_output ' - test_expect_success "GET UnixFS as $name with format=$format has expected Content-Type" ' + test_expect_success "GET UnixFS as $name with 'Accept: foo, application/vnd.ipld.dag-$format,bar' has expected Content-Type" ' + curl -sD - -H "Accept: foo, application/vnd.ipld.dag-$format,text/plain" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 && + test_should_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output + ' + + test_expect_success "GET UnixFS with format=$format returns raw (no conversion)" ' curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" > curl_output 2>&1 && - test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" curl_output && - test_should_contain "Content-Type: application/$format" curl_output && + test_should_not_contain "Content-Type: application/$format" curl_output && test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output ' - test_expect_success "GET UnixFS as $name with 'Accept: application/$format' has expected Content-Type" ' + test_expect_success "GET UnixFS with 'Accept: application/$format' returns raw (no conversion)" ' curl -sD - -H "Accept: application/$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 && - test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" curl_output && - test_should_contain "Content-Type: application/$format" curl_output && + test_should_not_contain "Content-Type: application/$format" curl_output && test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output ' - - test_expect_success "GET UnixFS as $name with 'Accept: foo, application/$format,bar' has expected Content-Type" ' - curl -sD - -H "Accept: foo, application/$format,text/plain" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 && - test_should_contain "Content-Type: application/$format" curl_output - ' } test_dag_pb_headers "DAG-JSON" "json" "inline" @@ -81,12 +79,6 @@ test_dag_pb () { ipfs dag get --output-codec dag-$format $DIR_CID > ipfs_dag_get_output 2>&1 && test_cmp ipfs_dag_get_output curl_output ' - - test_expect_success "GET UnixFS as $name with format=dag-$format and format=$format produce same output" ' - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=dag-$format" > curl_output_1 2>&1 && - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=$format" > curl_output_2 2>&1 && - test_cmp curl_output_1 curl_output_2 - ' } test_dag_pb "DAG-JSON" "json" @@ -148,17 +140,17 @@ test_cmp_dag_get "CBOR" "cbor" "attachment" ## Lossless conversion between JSON and CBOR -test_expect_success "GET JSON as CBOR produces DAG-CBOR output" ' +test_expect_success "GET JSON as CBOR produces CBOR output" ' CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec json) && curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=cbor" > curl_output 2>&1 && - ipfs dag get --output-codec dag-cbor $CID > ipfs_dag_get_output 2>&1 && + ipfs dag get --output-codec cbor $CID > ipfs_dag_get_output 2>&1 && test_cmp ipfs_dag_get_output curl_output ' -test_expect_success "GET CBOR as JSON produces DAG-JSON output" ' +test_expect_success "GET CBOR as JSON produces JSON output" ' CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec cbor) && curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=json" > curl_output 2>&1 && - ipfs dag get --output-codec dag-json $CID > ipfs_dag_get_output 2>&1 && + ipfs dag get --output-codec json $CID > ipfs_dag_get_output 2>&1 && test_cmp ipfs_dag_get_output curl_output ' @@ -204,12 +196,6 @@ test_expect_success "GET DAG-CBOR traverses multiple links" ' test_cmp expected actual ' -# test_expect_success "GET DAG-PB has expected output" ' -# curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_PB_CID?format=dag-json" > curl_output 2>&1 && -# jq --sort-keys . curl_output > actual && -# test_cmp ../t0123-gateway-json-cbor/dag-pb.json actual -# ' - ## NATIVE TESTS: ## DAG- regression tests for core behaviors when native DAG-(CBOR|JSON) is requested @@ -302,18 +288,11 @@ test_native_dag () { test_should_contain "Content-Type: application/vnd.ipld.dag-$format" output && test_should_contain "Content-Length: " output ' - test_expect_success "HEAD $name with an explicit JSON format returns HTTP 200" ' - curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=json" -o output && - test_should_contain "HTTP/1.1 200 OK" output && - test_should_contain "Etag: \"$CID.json\"" output && - test_should_contain "Content-Type: application/json" output && - test_should_contain "Content-Length: " output - ' - test_expect_success "HEAD dag-pb with ?format=$format returns HTTP 200" ' - curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" -o output && + test_expect_success "HEAD $name with an explicit DAG-JSON format returns HTTP 200" ' + curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-json" -o output && test_should_contain "HTTP/1.1 200 OK" output && - test_should_contain "Etag: \"$FILE_CID.$format\"" output && - test_should_contain "Content-Type: application/$format" output && + test_should_contain "Etag: \"$CID.dag-json\"" output && + test_should_contain "Content-Type: application/vnd.ipld.dag-json" output && test_should_contain "Content-Length: " output ' test_expect_success "HEAD $name with only-if-cached for missing block returns HTTP 412 Precondition Failed" '