Skip to content

Commit 67fdb6e

Browse files
authored
feat(gw): improved If-None-Match support (#8891)
Improves the way we handle If-None-Match header: - Support for more than one Etag passed in If-None-Match - Match both strong and weak Etags to maximize caching across various HTTP clients and libraries (some send weak Etags by default) - Support for wildcard '*' - Tests for If-None-Match behavior
1 parent d59730f commit 67fdb6e

File tree

4 files changed

+155
-14
lines changed

4 files changed

+155
-14
lines changed

core/corehttp/gateway_handler.go

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"io"
88
"mime"
99
"net/http"
10+
"net/textproto"
1011
"net/url"
1112
"os"
1213
gopath "path"
@@ -390,11 +391,18 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
390391
trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat))
391392
trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String()))
392393

393-
// Finish early if client already has matching Etag
394-
ifNoneMatch := r.Header.Get("If-None-Match")
395-
if ifNoneMatch == getEtag(r, resolvedPath.Cid()) || ifNoneMatch == getDirListingEtag(resolvedPath.Cid()) {
396-
w.WriteHeader(http.StatusNotModified)
397-
return
394+
// Detect when If-None-Match HTTP header allows returning HTTP 304 Not Modified
395+
if inm := r.Header.Get("If-None-Match"); inm != "" {
396+
pathCid := resolvedPath.Cid()
397+
// need to check against both File and Dir Etag variants
398+
// because this inexpensive check happens before we do any I/O
399+
cidEtag := getEtag(r, pathCid)
400+
dirEtag := getDirListingEtag(pathCid)
401+
if etagMatch(inm, cidEtag, dirEtag) {
402+
// Finish early if client already has a matching Etag
403+
w.WriteHeader(http.StatusNotModified)
404+
return
405+
}
398406
}
399407

400408
if err := i.handleGettingFirstBlock(r, begin, contentPath, resolvedPath); err != nil {
@@ -790,6 +798,71 @@ func getFilename(contentPath ipath.Path) string {
790798
return gopath.Base(s)
791799
}
792800

801+
// etagMatch evaluates if we can respond with HTTP 304 Not Modified
802+
// It supports multiple weak and strong etags passed in If-None-Matc stringh
803+
// including the wildcard one.
804+
func etagMatch(ifNoneMatchHeader string, cidEtag string, dirEtag string) bool {
805+
buf := ifNoneMatchHeader
806+
for {
807+
buf = textproto.TrimString(buf)
808+
if len(buf) == 0 {
809+
break
810+
}
811+
if buf[0] == ',' {
812+
buf = buf[1:]
813+
continue
814+
}
815+
// If-None-Match: * should match against any etag
816+
if buf[0] == '*' {
817+
return true
818+
}
819+
etag, remain := scanETag(buf)
820+
if etag == "" {
821+
break
822+
}
823+
// Check for match both strong and weak etags
824+
if etagWeakMatch(etag, cidEtag) || etagWeakMatch(etag, dirEtag) {
825+
return true
826+
}
827+
buf = remain
828+
}
829+
return false
830+
}
831+
832+
// scanETag determines if a syntactically valid ETag is present at s. If so,
833+
// the ETag and remaining text after consuming ETag is returned. Otherwise,
834+
// it returns "", "".
835+
// (This is the same logic as one executed inside of http.ServeContent)
836+
func scanETag(s string) (etag string, remain string) {
837+
s = textproto.TrimString(s)
838+
start := 0
839+
if strings.HasPrefix(s, "W/") {
840+
start = 2
841+
}
842+
if len(s[start:]) < 2 || s[start] != '"' {
843+
return "", ""
844+
}
845+
// ETag is either W/"text" or "text".
846+
// See RFC 7232 2.3.
847+
for i := start + 1; i < len(s); i++ {
848+
c := s[i]
849+
switch {
850+
// Character values allowed in ETags.
851+
case c == 0x21 || c >= 0x23 && c <= 0x7E || c >= 0x80:
852+
case c == '"':
853+
return s[:i+1], s[i+1:]
854+
default:
855+
return "", ""
856+
}
857+
}
858+
return "", ""
859+
}
860+
861+
// etagWeakMatch reports whether a and b match using weak ETag comparison.
862+
func etagWeakMatch(a, b string) bool {
863+
return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/")
864+
}
865+
793866
// generate Etag value based on HTTP request and CID
794867
func getEtag(r *http.Request, cid cid.Cid) string {
795868
prefix := `"`

core/corehttp/gateway_handler_unixfs_dir.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,9 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
9393
// type instead of relying on autodetection (which may fail).
9494
w.Header().Set("Content-Type", "text/html")
9595

96-
// Generated dir index requires custom Etag (it may change between go-ipfs versions)
97-
if assets.AssetHash != "" {
98-
dirEtag := getDirListingEtag(resolvedPath.Cid())
99-
w.Header().Set("Etag", dirEtag)
100-
if r.Header.Get("If-None-Match") == dirEtag {
101-
w.WriteHeader(http.StatusNotModified)
102-
return
103-
}
104-
}
96+
// Generated dir index requires custom Etag (output may change between go-ipfs versions)
97+
dirEtag := getDirListingEtag(resolvedPath.Cid())
98+
w.Header().Set("Etag", dirEtag)
10599

106100
if r.Method == http.MethodHead {
107101
logger.Debug("return as request's HTTP method is HEAD")

core/corehttp/gateway_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,3 +656,28 @@ func TestVersion(t *testing.T) {
656656
t.Fatalf("response doesn't contain protocol version:\n%s", s)
657657
}
658658
}
659+
660+
func TestEtagMatch(t *testing.T) {
661+
for _, test := range []struct {
662+
header string // value in If-None-Match HTTP header
663+
cidEtag string
664+
dirEtag string
665+
expected bool // expected result of etagMatch(header, cidEtag, dirEtag)
666+
}{
667+
{"", `"etag"`, "", false}, // no If-None-Match
668+
{"", "", `"etag"`, false}, // no If-None-Match
669+
{`"etag"`, `"etag"`, "", true}, // file etag match
670+
{`W/"etag"`, `"etag"`, "", true}, // file etag match
671+
{`"foo", W/"bar", W/"etag"`, `"etag"`, "", true}, // file etag match (array)
672+
{`"foo",W/"bar",W/"etag"`, `"etag"`, "", true}, // file etag match (compact array)
673+
{`"etag"`, "", `W/"etag"`, true}, // dir etag match
674+
{`"etag"`, "", `W/"etag"`, true}, // dir etag match
675+
{`W/"etag"`, "", `W/"etag"`, true}, // dir etag match
676+
{`*`, `"etag"`, "", true}, // wildcard etag match
677+
} {
678+
result := etagMatch(test.header, test.cidEtag, test.dirEtag)
679+
if result != test.expected {
680+
t.Fatalf("unexpected result of etagMatch(%q, %q, %q), got %t, expected %t", test.header, test.cidEtag, test.dirEtag, result, test.expected)
681+
}
682+
}
683+
}

test/sharness/t0116-gateway-cache.sh

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,55 @@ test_expect_success "Prepare IPNS unixfs content path for testing" '
145145
grep "< Etag: \"${FILE_CID}\"" curl_ipns_file_output
146146
'
147147

148+
# If-None-Match (return 304 Not Modified when client sends matching Etag they already have)
149+
150+
test_expect_success "GET for /ipfs/ file with matching Etag in If-None-Match returns 304 Not Modified" '
151+
curl -svX GET -H "If-None-Match: \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
152+
test_should_contain "304 Not Modified" curl_output
153+
'
154+
155+
test_expect_success "GET for /ipfs/ dir with index.html file with matching Etag in If-None-Match returns 304 Not Modified" '
156+
curl -svX GET -H "If-None-Match: \"$ROOT4_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/" >/dev/null 2>curl_output &&
157+
test_should_contain "304 Not Modified" curl_output
158+
'
159+
160+
test_expect_success "GET for /ipfs/ file with matching third Etag in If-None-Match returns 304 Not Modified" '
161+
curl -svX GET -H "If-None-Match: \"fakeEtag1\", \"fakeEtag2\", \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
162+
test_should_contain "304 Not Modified" curl_output
163+
'
164+
165+
test_expect_success "GET for /ipfs/ file with matching weak Etag in If-None-Match returns 304 Not Modified" '
166+
curl -svX GET -H "If-None-Match: W/\"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
167+
test_should_contain "304 Not Modified" curl_output
168+
'
169+
170+
test_expect_success "GET for /ipfs/ file with wildcard Etag in If-None-Match returns 304 Not Modified" '
171+
curl -svX GET -H "If-None-Match: *" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
172+
test_should_contain "304 Not Modified" curl_output
173+
'
174+
175+
test_expect_success "GET for /ipns/ file with matching Etag in If-None-Match returns 304 Not Modified" '
176+
curl -svX GET -H "If-None-Match: \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
177+
test_should_contain "304 Not Modified" curl_output
178+
'
179+
180+
test_expect_success "GET for /ipfs/ dir listing with matching weak Etag in If-None-Match returns 304 Not Modified" '
181+
curl -svX GET -H "If-None-Match: W/\"$ROOT3_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_output &&
182+
test_should_contain "304 Not Modified" curl_output
183+
'
184+
185+
# DirIndex etag is based on xxhash(./assets/dir-index-html), so we need to fetch it dynamically
186+
test_expect_success "GET for /ipfs/ dir listing with matching strong Etag in If-None-Match returns 304 Not Modified" '
187+
curl -Is "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/"| grep -i Etag | cut -f2- -d: | tr -d "[:space:]\"" > dir_index_etag &&
188+
curl -svX GET -H "If-None-Match: \"$(<dir_index_etag)\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_output &&
189+
test_should_contain "304 Not Modified" curl_output
190+
'
191+
test_expect_success "GET for /ipfs/ dir listing with matching weak Etag in If-None-Match returns 304 Not Modified" '
192+
curl -Is "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/"| grep -i Etag | cut -f2- -d: | tr -d "[:space:]\"" > dir_index_etag &&
193+
curl -svX GET -H "If-None-Match: W/\"$(<dir_index_etag)\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_output &&
194+
test_should_contain "304 Not Modified" curl_output
195+
'
196+
148197
test_kill_ipfs_daemon
149198

150199
test_done

0 commit comments

Comments
 (0)