From 772fab3a74c6f6d6194b5958912bd6160a83000f Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Wed, 9 Nov 2022 19:20:33 +0100 Subject: [PATCH] feat(gateway): TAR response format (#9029) Implementation of IPIP-288 (https://github.com/ipfs/specs/pull/288) Co-authored-by: Marcin Rataj This commit was moved from ipfs/kubo@a210abd74364076404c18df1acbeed8bd6a5d6b7 --- gateway/core/corehttp/gateway_handler.go | 18 ++-- gateway/core/corehttp/gateway_handler_tar.go | 92 ++++++++++++++++++++ 2 files changed, 105 insertions(+), 5 deletions(-) create mode 100644 gateway/core/corehttp/gateway_handler_tar.go diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index a96799f58..7f0f11885 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -430,6 +430,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request carVersion := formatParams["version"] i.serveCAR(r.Context(), w, r, resolvedPath, contentPath, carVersion, begin) return + case "application/x-tar": + logger.Debugw("serving tar file", "path", contentPath) + i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger) + return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) webError(w, "failed respond with requested content type", err, http.StatusBadRequest) @@ -842,9 +846,10 @@ func getEtag(r *http.Request, cid cid.Cid) string { responseFormat, _, err := customResponseFormat(r) if err == nil && responseFormat != "" { // application/vnd.ipld.foo → foo - f := responseFormat[strings.LastIndex(responseFormat, ".")+1:] - // Etag: "cid.foo" (gives us nice compression together with Content-Disposition in block (raw) and car responses) - suffix = `.` + f + suffix + // application/x-bar → x-bar + shortFormat := responseFormat[strings.LastIndexAny(responseFormat, "/.")+1:] + // Etag: "cid.shortFmt" (gives us nice compression together with Content-Disposition in block (raw) and car responses) + suffix = `.` + shortFormat + suffix } // TODO: include selector suffix when https://github.com/ipfs/kubo/issues/8769 lands return prefix + cid.String() + suffix @@ -859,14 +864,17 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] return "application/vnd.ipld.raw", nil, nil case "car": return "application/vnd.ipld.car", nil, nil + case "tar": + return "application/x-tar", nil, nil } } // Browsers and other user agents will send Accept header with generic types like: // Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8 - // We only care about explciit, vendor-specific content-types. + // We only care about explicit, vendor-specific content-types. for _, accept := range r.Header.Values("Accept") { // respond to the very first ipld content type - if strings.HasPrefix(accept, "application/vnd.ipld") { + if strings.HasPrefix(accept, "application/vnd.ipld") || + strings.HasPrefix(accept, "application/x-tar") { mediatype, params, err := mime.ParseMediaType(accept) if err != nil { return "", nil, err diff --git a/gateway/core/corehttp/gateway_handler_tar.go b/gateway/core/corehttp/gateway_handler_tar.go new file mode 100644 index 000000000..532d88757 --- /dev/null +++ b/gateway/core/corehttp/gateway_handler_tar.go @@ -0,0 +1,92 @@ +package corehttp + +import ( + "context" + "html" + "net/http" + "time" + + files "github.com/ipfs/go-ipfs-files" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + "github.com/ipfs/kubo/tracing" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" +) + +var unixEpochTime = time.Unix(0, 0) + +func (i *gatewayHandler) serveTAR(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { + ctx, span := tracing.Span(ctx, "Gateway", "ServeTAR", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) + defer span.End() + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // Get Unixfs file + file, err := i.api.Unixfs().Get(ctx, resolvedPath) + if err != nil { + webError(w, "ipfs cat "+html.EscapeString(contentPath.String()), err, http.StatusBadRequest) + return + } + defer file.Close() + + rootCid := resolvedPath.Cid() + + // Set Cache-Control and read optional Last-Modified time + modtime := addCacheControlHeaders(w, r, contentPath, rootCid) + + // Weak Etag W/ because we can't guarantee byte-for-byte identical + // responses, but still want to benefit from HTTP Caching. Two TAR + // responses for the same CID will be logically equivalent, + // but when TAR is streamed, then in theory, files and directories + // may arrive in different order (depends on TAR lib and filesystem/inodes). + etag := `W/` + getEtag(r, rootCid) + w.Header().Set("Etag", etag) + + // Finish early if Etag match + if r.Header.Get("If-None-Match") == etag { + w.WriteHeader(http.StatusNotModified) + return + } + + // Set Content-Disposition + var name string + if urlFilename := r.URL.Query().Get("filename"); urlFilename != "" { + name = urlFilename + } else { + name = rootCid.String() + ".tar" + } + setContentDispositionHeader(w, name, "attachment") + + // Construct the TAR writer + tarw, err := files.NewTarWriter(w) + if err != nil { + webError(w, "could not build tar writer", err, http.StatusInternalServerError) + return + } + defer tarw.Close() + + // Sets correct Last-Modified header. This code is borrowed from the standard + // library (net/http/server.go) as we cannot use serveFile without throwing the entire + // TAR into the memory first. + if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) { + w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat)) + } + + w.Header().Set("Content-Type", "application/x-tar") + w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + + // The TAR has a top-level directory (or file) named by the CID. + if err := tarw.WriteFile(file, rootCid.String()); err != nil { + w.Header().Set("X-Stream-Error", err.Error()) + // Trailer headers do not work in web browsers + // (see https://github.com/mdn/browser-compat-data/issues/14703) + // and we have limited options around error handling in browser contexts. + // To improve UX/DX, we finish response stream with error message, allowing client to + // (1) detect error by having corrupted TAR + // (2) be able to reason what went wrong by instecting the tail of TAR stream + _, _ = w.Write([]byte(err.Error())) + return + } +}