Skip to content

Commit d4e2176

Browse files
authored
CBG-4767: Make _raw endpoint return actual state of persisted doc (#7649)
1 parent fa7acc6 commit d4e2176

File tree

14 files changed

+434
-371
lines changed

14 files changed

+434
-371
lines changed

base/constants.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,15 @@ const (
157157
FromConnStrWarningThreshold = 10 * time.Second
158158
)
159159

160+
// SyncGatewayRawDocXattrs is a list of xattrs that Sync Gateway will fetch when reading a raw document.
161+
var SyncGatewayRawDocXattrs = []string{
162+
SyncXattrName,
163+
GlobalXattrName,
164+
VvXattrName,
165+
MouXattrName,
166+
VirtualDocumentXattr,
167+
}
168+
160169
const (
161170
DefaultScope = "_default"
162171
DefaultCollection = "_default"

base/error.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ func (err *HTTPError) Error() string {
104104

105105
// HTTPErrorf creates an HTTPError with an http status code and a formatted message.
106106
func HTTPErrorf(status int, format string, args ...interface{}) *HTTPError {
107-
return NewHTTPError(status, fmt.Sprintf(format, args...))
107+
return NewHTTPError(status, fmt.Errorf(format, args...).Error())
108108
}
109109

110110
// NewHTTPError creates an HTTPError with an http status code and a message. Use HTTPErrorf for printf style messages.

channels/main_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ func TestMain(m *testing.M) {
2424
teardownFuncs := make([]func(), 0)
2525
teardownFuncs = append(teardownFuncs, base.SetUpGlobalTestLogging(ctx))
2626
teardownFuncs = append(teardownFuncs, base.SetUpGlobalTestProfiling(m))
27-
teardownFuncs = append(teardownFuncs, base.SetUpGlobalTestMemoryWatermark(m, 256))
27+
teardownFuncs = append(teardownFuncs, base.SetUpGlobalTestMemoryWatermark(m, 512))
2828

2929
base.SkipPrometheusStatsRegistration = true
3030

db/database_collection.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,15 @@ package db
1010

1111
import (
1212
"context"
13+
"encoding/json"
1314
"fmt"
15+
"slices"
1416
"time"
1517

1618
"github.com/couchbase/sync_gateway/auth"
1719
"github.com/couchbase/sync_gateway/base"
1820
"github.com/couchbase/sync_gateway/channels"
21+
"github.com/google/uuid"
1922
)
2023

2124
// DatabaseCollection provides a representation of a single collection of a database.
@@ -59,6 +62,79 @@ func (c *DatabaseCollection) AllowConflicts() bool {
5962
return base.DefaultAllowConflicts
6063
}
6164

65+
type GetRawDocOpts struct {
66+
IncludeDoc bool // If true, the document body will be returned as well as the metadata.
67+
Redact bool
68+
RedactSalt string
69+
}
70+
71+
// GetRawDoc returns the persisted version of a document, including its SG-related xattrs.
72+
func (c *DatabaseCollection) GetRawDoc(ctx context.Context, docID string, opts *GetRawDocOpts) (docBody json.RawMessage, xattrs map[string]json.RawMessage, err error) {
73+
if opts == nil {
74+
// default
75+
opts = &GetRawDocOpts{
76+
IncludeDoc: true,
77+
}
78+
}
79+
80+
if opts.Redact && opts.RedactSalt == "" {
81+
// generate a random salt if one is not provided
82+
opts.RedactSalt = uuid.New().String()
83+
} else if !opts.Redact && opts.RedactSalt != "" {
84+
return nil, nil, fmt.Errorf("RedactSalt provided to GetRawDoc but Redact is not enabled")
85+
}
86+
87+
// collect the set of xattrs to fetch that Sync Gateway is interested in.
88+
xattrKeys := slices.Clone(base.SyncGatewayRawDocXattrs)
89+
userXattrKey := c.dbCtx.Options.UserXattrKey
90+
if userXattrKey != "" {
91+
// we'll redact this later after fetching - it's still useful to know it was present even if we can't see the contents.
92+
xattrKeys = append(xattrKeys, userXattrKey)
93+
}
94+
95+
var xattrValuesBytes map[string][]byte
96+
if opts.IncludeDoc {
97+
docBody, xattrValuesBytes, _, err = c.dataStore.GetWithXattrs(ctx, docID, xattrKeys)
98+
} else {
99+
xattrValuesBytes, _, err = c.dataStore.GetXattrs(ctx, docID, xattrKeys)
100+
}
101+
if err != nil {
102+
return nil, nil, err
103+
}
104+
105+
// stamp all requested xattrKeys and populate with values where appropriate (ensures null values are present)
106+
xattrs = make(map[string]json.RawMessage, len(xattrValuesBytes))
107+
for _, k := range xattrKeys {
108+
if opts.Redact {
109+
switch k {
110+
case base.SyncXattrName:
111+
redactedV, err := RedactRawSyncData(xattrValuesBytes[k], opts.RedactSalt)
112+
if err != nil {
113+
return nil, nil, fmt.Errorf("couldn't redact sync data: %w", err)
114+
}
115+
xattrs[k] = redactedV
116+
case base.GlobalXattrName:
117+
redactedV, err := RedactRawGlobalSyncData(xattrValuesBytes[k], opts.RedactSalt)
118+
if err != nil {
119+
return nil, nil, fmt.Errorf("couldn't redact global sync data: %w", err)
120+
}
121+
xattrs[k] = redactedV
122+
case userXattrKey:
123+
// include the key but not the value so we can see _something_ was present.
124+
xattrs[k] = []byte(`"redacted"`)
125+
default:
126+
// no redaction for this key
127+
xattrs[k] = xattrValuesBytes[k]
128+
}
129+
} else {
130+
// redaction disabled
131+
xattrs[k] = xattrValuesBytes[k]
132+
}
133+
}
134+
135+
return docBody, xattrs, nil
136+
}
137+
62138
func (c *DatabaseCollection) GetCollectionDatastore() base.DataStore {
63139
return c.dataStore
64140
}

db/document.go

Lines changed: 71 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"context"
1414
"crypto/sha256"
1515
"encoding/base64"
16+
"encoding/json"
1617
"errors"
1718
"fmt"
1819
"math"
@@ -131,71 +132,105 @@ func (sd *SyncData) getCurrentChannels() base.Set {
131132
return ch
132133
}
133134

134-
func (sd *SyncData) HashRedact(salt string) SyncData {
135-
136-
// Creating a new SyncData with the redacted info. We copy all the information which stays the same and create new
137-
// items for the redacted data. The data to be redacted is populated below.
138-
redactedSyncData := SyncData{
139-
CurrentRev: sd.CurrentRev,
140-
NewestRev: sd.NewestRev,
141-
Flags: sd.Flags,
142-
Sequence: sd.Sequence,
143-
UnusedSequences: sd.UnusedSequences,
144-
RecentSequences: sd.RecentSequences,
145-
History: RevTree{},
146-
Channels: channels.ChannelMap{},
147-
Access: UserAccessMap{},
148-
RoleAccess: UserAccessMap{},
149-
Expiry: sd.Expiry,
150-
Cas: sd.Cas,
151-
Crc32c: sd.Crc32c,
152-
TombstonedAt: sd.TombstonedAt,
153-
Attachments: AttachmentsMeta{},
154-
}
155-
156-
// Populate and redact channels
135+
// RedactRawGlobalSyncData runs HashRedact on the given global sync data.
136+
func RedactRawGlobalSyncData(syncData []byte, redactSalt string) ([]byte, error) {
137+
if redactSalt == "" {
138+
return nil, fmt.Errorf("redact salt must be set")
139+
}
140+
141+
var gsd GlobalSyncData
142+
if err := json.Unmarshal(syncData, &gsd); err != nil {
143+
return nil, fmt.Errorf("couldn't unmarshal sync data: %w", err)
144+
}
145+
146+
if err := gsd.HashRedact(redactSalt); err != nil {
147+
return nil, fmt.Errorf("couldn't redact global sync data: %w", err)
148+
}
149+
150+
return json.Marshal(gsd)
151+
}
152+
153+
// HashRedact does in-place redaction of UserData inside GlobalSyncData, by hashing attachment names.
154+
func (gsd *GlobalSyncData) HashRedact(salt string) error {
155+
for k, v := range gsd.GlobalAttachments {
156+
gsd.GlobalAttachments[base.Sha1HashString(k, salt)] = v
157+
delete(gsd.GlobalAttachments, k)
158+
}
159+
return nil
160+
}
161+
162+
// RedactRawSyncData runs HashRedact on the given sync data.
163+
func RedactRawSyncData(syncData []byte, redactSalt string) ([]byte, error) {
164+
if redactSalt == "" {
165+
return nil, fmt.Errorf("redact salt must be set")
166+
}
167+
168+
var sd SyncDataAlias
169+
if err := json.Unmarshal(syncData, &sd); err != nil {
170+
return nil, fmt.Errorf("couldn't unmarshal sync data: %w", err)
171+
}
172+
173+
if err := sd.HashRedact(redactSalt); err != nil {
174+
return nil, fmt.Errorf("couldn't redact sync data: %w", err)
175+
}
176+
177+
return json.Marshal(sd)
178+
}
179+
180+
// HashRedact does in-place redaction of UserData inside SyncData, by hashing channel names, user access, role access, and attachment names.
181+
func (sd *SyncDataAlias) HashRedact(salt string) error {
182+
183+
// Redact channel names
157184
for k, v := range sd.Channels {
158-
redactedSyncData.Channels[base.Sha1HashString(k, salt)] = v
185+
sd.Channels[base.Sha1HashString(k, salt)] = v
186+
delete(sd.Channels, k)
187+
}
188+
for i, v := range sd.ChannelSet {
189+
sd.ChannelSet[i].Name = base.Sha1HashString(v.Name, salt)
190+
}
191+
for i, v := range sd.ChannelSetHistory {
192+
sd.ChannelSetHistory[i].Name = base.Sha1HashString(v.Name, salt)
159193
}
160194

161-
// Populate and redact history. This is done as it also includes channel names
195+
// Redact history. This is done as it also includes channel names
162196
for k, revInfo := range sd.History {
163-
164197
if revInfo.Channels != nil {
165-
redactedChannels := base.Set{}
198+
redactedChannels := make(base.Set, len(revInfo.Channels))
166199
for existingChanKey := range revInfo.Channels {
167200
redactedChannels.Add(base.Sha1HashString(existingChanKey, salt))
168201
}
169202
revInfo.Channels = redactedChannels
170203
}
171-
172-
redactedSyncData.History[k] = revInfo
204+
sd.History[k] = revInfo
173205
}
174206

175-
// Populate and redact user access
207+
// Redact user access
176208
for k, v := range sd.Access {
177209
accessTimerSet := map[string]channels.VbSequence{}
178210
for channelName, vbStats := range v {
179211
accessTimerSet[base.Sha1HashString(channelName, salt)] = vbStats
180212
}
181-
redactedSyncData.Access[base.Sha1HashString(k, salt)] = accessTimerSet
213+
sd.Access[base.Sha1HashString(k, salt)] = accessTimerSet
214+
delete(sd.Access, k)
182215
}
183216

184-
// Populate and redact user role access
217+
// Redact user role access
185218
for k, v := range sd.RoleAccess {
186219
accessTimerSet := map[string]channels.VbSequence{}
187220
for channelName, vbStats := range v {
188221
accessTimerSet[base.Sha1HashString(channelName, salt)] = vbStats
189222
}
190-
redactedSyncData.RoleAccess[base.Sha1HashString(k, salt)] = accessTimerSet
223+
sd.RoleAccess[base.Sha1HashString(k, salt)] = accessTimerSet
224+
delete(sd.RoleAccess, k)
191225
}
192226

193-
// Populate and redact attachment names
227+
// Redact attachment names (pre-4.0 attachment location)
194228
for k, v := range sd.Attachments {
195-
redactedSyncData.Attachments[base.Sha1HashString(k, salt)] = v
229+
sd.Attachments[base.Sha1HashString(k, salt)] = v
230+
delete(sd.Attachments, k)
196231
}
197232

198-
return redactedSyncData
233+
return nil
199234
}
200235

201236
// A document as stored in Couchbase. Contains the body of the current revision plus metadata.

0 commit comments

Comments
 (0)