Skip to content

Commit 763ab5d

Browse files
authored
fix(bigquery): dependency detection on proto conversion (#8566)
1 parent 8e53787 commit 763ab5d

File tree

2 files changed

+192
-2
lines changed

2 files changed

+192
-2
lines changed

bigquery/storage/managedwriter/adapt/protoconversion.go

+23-2
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@ func storageSchemaToDescriptorInternal(inSchema *storagepb.TableSchema, scope st
185185
if foundDesc != nil {
186186
// check to see if we already have this in current dependency list
187187
haveDep := false
188-
for _, curDep := range deps {
189-
if foundDesc.ParentFile().FullName() == curDep.FullName() {
188+
for _, dep := range deps {
189+
if messageDependsOnFile(foundDesc, dep) {
190190
haveDep = true
191191
break
192192
}
@@ -279,6 +279,27 @@ func storageSchemaToDescriptorInternal(inSchema *storagepb.TableSchema, scope st
279279
return found.(protoreflect.MessageDescriptor), nil
280280
}
281281

282+
// messageDependsOnFile checks if the given message descriptor already belongs to the file descriptor.
283+
// To check for that, first we check if the message descriptor parent file is the same as the file descriptor.
284+
// If not, check if the message descriptor belongs is contained as a child of the file descriptor.
285+
func messageDependsOnFile(msg protoreflect.MessageDescriptor, file protoreflect.FileDescriptor) bool {
286+
parentFile := msg.ParentFile()
287+
parentFileName := parentFile.FullName()
288+
if parentFileName != "" {
289+
if parentFileName == file.FullName() {
290+
return true
291+
}
292+
}
293+
fileMessages := file.Messages()
294+
for i := 0; i < fileMessages.Len(); i++ {
295+
childMsg := fileMessages.Get(i)
296+
if msg.FullName() == childMsg.FullName() {
297+
return true
298+
}
299+
}
300+
return false
301+
}
302+
282303
// tableFieldSchemaToFieldDescriptorProto builds individual field descriptors for a proto message.
283304
//
284305
// For proto3, in cases where the mode is nullable we use the well known wrapper types.

bigquery/storage/managedwriter/adapt/protoconversion_test.go

+169
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,175 @@ func TestSchemaToProtoConversion(t *testing.T) {
360360
},
361361
},
362362
},
363+
{
364+
description: "nested with reused submessage in different levels",
365+
bq: &storagepb.TableSchema{
366+
Fields: []*storagepb.TableFieldSchema{
367+
{
368+
Name: "reused_inner_struct",
369+
Type: storagepb.TableFieldSchema_STRUCT,
370+
Mode: storagepb.TableFieldSchema_REQUIRED,
371+
Fields: []*storagepb.TableFieldSchema{
372+
{
373+
Name: "leaf",
374+
Type: storagepb.TableFieldSchema_STRING,
375+
Mode: storagepb.TableFieldSchema_REQUIRED,
376+
},
377+
},
378+
},
379+
{
380+
Name: "outer_struct",
381+
Type: storagepb.TableFieldSchema_STRUCT,
382+
Mode: storagepb.TableFieldSchema_REQUIRED,
383+
Fields: []*storagepb.TableFieldSchema{
384+
{
385+
Name: "another_inner_struct",
386+
Type: storagepb.TableFieldSchema_STRUCT,
387+
Mode: storagepb.TableFieldSchema_REQUIRED,
388+
Fields: []*storagepb.TableFieldSchema{
389+
{
390+
Name: "another_leaf",
391+
Type: storagepb.TableFieldSchema_STRING,
392+
Mode: storagepb.TableFieldSchema_REQUIRED,
393+
},
394+
},
395+
},
396+
{
397+
Name: "reused_inner_struct_one",
398+
Type: storagepb.TableFieldSchema_STRUCT,
399+
Mode: storagepb.TableFieldSchema_REQUIRED,
400+
Fields: []*storagepb.TableFieldSchema{
401+
{
402+
Name: "leaf",
403+
Type: storagepb.TableFieldSchema_STRING,
404+
Mode: storagepb.TableFieldSchema_REQUIRED,
405+
},
406+
},
407+
},
408+
{
409+
Name: "reused_inner_struct_two",
410+
Type: storagepb.TableFieldSchema_STRUCT,
411+
Mode: storagepb.TableFieldSchema_REQUIRED,
412+
Fields: []*storagepb.TableFieldSchema{
413+
{
414+
Name: "leaf",
415+
Type: storagepb.TableFieldSchema_STRING,
416+
Mode: storagepb.TableFieldSchema_REQUIRED,
417+
},
418+
},
419+
},
420+
},
421+
},
422+
},
423+
},
424+
wantProto2: &descriptorpb.DescriptorProto{
425+
Name: proto.String("root"),
426+
Field: []*descriptorpb.FieldDescriptorProto{
427+
{
428+
Name: proto.String("reused_inner_struct"),
429+
Number: proto.Int32(1),
430+
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
431+
TypeName: proto.String(".root__reused_inner_struct"),
432+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
433+
},
434+
{
435+
Name: proto.String("outer_struct"),
436+
Number: proto.Int32(2),
437+
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
438+
TypeName: proto.String(".root__outer_struct"),
439+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
440+
},
441+
},
442+
},
443+
wantProto2Normalized: &descriptorpb.DescriptorProto{
444+
Name: proto.String("root"),
445+
Field: []*descriptorpb.FieldDescriptorProto{
446+
{
447+
Name: proto.String("reused_inner_struct"),
448+
Number: proto.Int32(1),
449+
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
450+
TypeName: proto.String("root__reused_inner_struct"),
451+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
452+
},
453+
{
454+
Name: proto.String("outer_struct"),
455+
Number: proto.Int32(2),
456+
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
457+
TypeName: proto.String("root__outer_struct"),
458+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
459+
},
460+
},
461+
NestedType: []*descriptorpb.DescriptorProto{
462+
{
463+
Name: proto.String("root__reused_inner_struct"),
464+
Field: []*descriptorpb.FieldDescriptorProto{
465+
{
466+
Name: proto.String("leaf"),
467+
Number: proto.Int32(1),
468+
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
469+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
470+
},
471+
},
472+
},
473+
{
474+
Name: proto.String("root__outer_struct__another_inner_struct"),
475+
Field: []*descriptorpb.FieldDescriptorProto{
476+
{
477+
Name: proto.String("another_leaf"),
478+
Number: proto.Int32(1),
479+
Type: descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum(),
480+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
481+
},
482+
},
483+
},
484+
{
485+
Name: proto.String("root__outer_struct"),
486+
Field: []*descriptorpb.FieldDescriptorProto{
487+
{
488+
Name: proto.String("another_inner_struct"),
489+
Number: proto.Int32(1),
490+
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
491+
TypeName: proto.String("root__outer_struct__another_inner_struct"),
492+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
493+
},
494+
{
495+
Name: proto.String("reused_inner_struct_one"),
496+
Number: proto.Int32(2),
497+
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
498+
TypeName: proto.String("root__reused_inner_struct"),
499+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
500+
},
501+
{
502+
Name: proto.String("reused_inner_struct_two"),
503+
Number: proto.Int32(3),
504+
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
505+
TypeName: proto.String("root__reused_inner_struct"),
506+
Label: descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum(),
507+
},
508+
},
509+
},
510+
},
511+
},
512+
wantProto3: &descriptorpb.DescriptorProto{
513+
Name: proto.String("root"),
514+
Field: []*descriptorpb.FieldDescriptorProto{
515+
{
516+
Name: proto.String("reused_inner_struct"),
517+
Number: proto.Int32(1),
518+
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
519+
TypeName: proto.String(".root__reused_inner_struct"),
520+
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
521+
},
522+
{
523+
Name: proto.String("outer_struct"),
524+
Number: proto.Int32(2),
525+
Type: descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum(),
526+
TypeName: proto.String(".root__outer_struct"),
527+
Label: descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum(),
528+
},
529+
},
530+
},
531+
},
363532
{
364533
description: "multiple nesting levels",
365534
bq: &storagepb.TableSchema{

0 commit comments

Comments
 (0)