@@ -29,7 +29,10 @@ option php_namespace = "Google\\Cloud\\Dlp\\V2";
29
29
30
30
// Type of information detected by the API.
31
31
message InfoType {
32
- // Name of the information type.
32
+ // Name of the information type. Either a name of your choosing when
33
+ // creating a CustomInfoType, or one of the names listed
34
+ // at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
35
+ // a built-in type.
33
36
string name = 1 ;
34
37
}
35
38
@@ -86,13 +89,13 @@ message CustomInfoType {
86
89
// These types of transformations are
87
90
// those that perform pseudonymization, thereby producing a "surrogate" as
88
91
// output. This should be used in conjunction with a field on the
89
- // transformation such as `surrogate_info_type`. This custom info type does
92
+ // transformation such as `surrogate_info_type`. This CustomInfoType does
90
93
// not support the use of `detection_rules`.
91
94
message SurrogateType {
92
95
93
96
}
94
97
95
- // Rule for modifying a custom info type to alter behavior under certain
98
+ // Rule for modifying a CustomInfoType to alter behavior under certain
96
99
// circumstances, depending on the specific details of the rule. Not supported
97
100
// for the `surrogate_type` custom info type.
98
101
message DetectionRule {
@@ -125,10 +128,10 @@ message CustomInfoType {
125
128
}
126
129
}
127
130
128
- // Detection rule that adjusts the likelihood of findings within a certain
131
+ // The rule that adjusts the likelihood of findings within a certain
129
132
// proximity of hotwords.
130
133
message HotwordRule {
131
- // Regex pattern defining what qualifies as a hotword.
134
+ // Regular expression pattern defining what qualifies as a hotword.
132
135
Regex hotword_regex = 1 ;
133
136
134
137
// Proximity of the finding within which the entire hotword must reside.
@@ -151,29 +154,30 @@ message CustomInfoType {
151
154
}
152
155
}
153
156
154
- // Info type configuration. All custom info types must have configurations
155
- // that do not conflict with built-in info types or other custom info types .
157
+ // All CustomInfoTypes must have a name
158
+ // that does not conflict with built-in InfoTypes or other CustomInfoTypes .
156
159
InfoType info_type = 1 ;
157
160
158
- // Likelihood to return for this custom info type . This base value can be
161
+ // Likelihood to return for this CustomInfoType . This base value can be
159
162
// altered by a detection rule if the finding meets the criteria specified by
160
163
// the rule. Defaults to `VERY_LIKELY` if not specified.
161
164
Likelihood likelihood = 6 ;
162
165
163
166
oneof type {
164
- // Dictionary-based custom info type .
167
+ // A list of phrases to detect as a CustomInfoType .
165
168
Dictionary dictionary = 2 ;
166
169
167
- // Regex-based custom info type .
170
+ // Regular expression based CustomInfoType .
168
171
Regex regex = 3 ;
169
172
170
- // Surrogate info type.
173
+ // Message for detecting output from deidentification transformations that
174
+ // support reversing.
171
175
SurrogateType surrogate_type = 4 ;
172
176
}
173
177
174
- // Set of detection rules to apply to all findings of this custom info type .
178
+ // Set of detection rules to apply to all findings of this CustomInfoType .
175
179
// Rules are applied in order that they are specified. Not supported for the
176
- // `surrogate_type` custom info type .
180
+ // `surrogate_type` CustomInfoType .
177
181
repeated DetectionRule detection_rules = 7 ;
178
182
}
179
183
@@ -223,6 +227,20 @@ message CloudStorageOptions {
223
227
string url = 1 ;
224
228
}
225
229
230
+ // How to sample bytes if not all bytes are scanned. Meaningful only when used
231
+ // in conjunction with bytes_limit_per_file. If not specified, scanning would
232
+ // start from the top.
233
+ enum SampleMethod {
234
+ SAMPLE_METHOD_UNSPECIFIED = 0 ;
235
+
236
+ // Scan from the top (default).
237
+ TOP = 1 ;
238
+
239
+ // For each file larger than bytes_limit_per_file, randomly pick the offset
240
+ // to start scanning. The scanned bytes are contiguous.
241
+ RANDOM_START = 2 ;
242
+ }
243
+
226
244
FileSet file_set = 1 ;
227
245
228
246
// Max number of bytes to scan from a file. If a scanned file's size is bigger
@@ -233,9 +251,16 @@ message CloudStorageOptions {
233
251
// If empty, all files are scanned and available data format processors
234
252
// are applied.
235
253
repeated FileType file_types = 5 ;
254
+
255
+ SampleMethod sample_method = 6 ;
256
+
257
+ // Limits the number of files to scan to this percentage of the input FileSet.
258
+ // Number of files scanned is rounded down. Must be between 0 and 100,
259
+ // inclusively. Both 0 and 100 means no limit. Defaults to 0.
260
+ int32 files_limit_percent = 7 ;
236
261
}
237
262
238
- // Message representing a path in Cloud Storage.
263
+ // Message representing a single file or path in Cloud Storage.
239
264
message CloudStoragePath {
240
265
// A url representing a file or path (no wildcards) in Cloud Storage.
241
266
// Example: gs://[BUCKET_NAME]/dictionary.txt
@@ -244,6 +269,19 @@ message CloudStoragePath {
244
269
245
270
// Options defining BigQuery table and row identifiers.
246
271
message BigQueryOptions {
272
+ // How to sample rows if not all rows are scanned. Meaningful only when used
273
+ // in conjunction with rows_limit. If not specified, scanning would start
274
+ // from the top.
275
+ enum SampleMethod {
276
+ SAMPLE_METHOD_UNSPECIFIED = 0 ;
277
+
278
+ // Scan from the top (default).
279
+ TOP = 1 ;
280
+
281
+ // Randomly pick the row to start scanning. The scanned rows are contiguous.
282
+ RANDOM_START = 2 ;
283
+ }
284
+
247
285
// Complete BigQuery table reference.
248
286
BigQueryTable table_reference = 1 ;
249
287
@@ -255,6 +293,8 @@ message BigQueryOptions {
255
293
// rest of the rows are omitted. If not set, or if set to 0, all rows will be
256
294
// scanned. Cannot be used in conjunction with TimespanConfig.
257
295
int64 rows_limit = 3 ;
296
+
297
+ SampleMethod sample_method = 4 ;
258
298
}
259
299
260
300
// Shared message indicating Cloud storage type.
@@ -402,7 +442,7 @@ message EntityId {
402
442
// Categorization of results based on how likely they are to represent a match,
403
443
// based on the number of elements they contain which imply a match.
404
444
enum Likelihood {
405
- // Default value; information with all likelihoods is included .
445
+ // Default value; same as POSSIBLE .
406
446
LIKELIHOOD_UNSPECIFIED = 0 ;
407
447
408
448
// Few matching elements.
0 commit comments