44
44
)
45
45
from vertexai .rag .utils .resources import (
46
46
JiraSource ,
47
+ LayoutParserConfig ,
47
48
RagCorpus ,
48
49
RagFile ,
49
50
RagVectorDbConfig ,
@@ -395,6 +396,7 @@ def import_files(
395
396
timeout : int = 600 ,
396
397
max_embedding_requests_per_min : int = 1000 ,
397
398
partial_failures_sink : Optional [str ] = None ,
399
+ parser : Optional [LayoutParserConfig ] = None ,
398
400
) -> ImportRagFilesResponse :
399
401
"""
400
402
Import files to an existing RagCorpus, wait until completion.
@@ -473,6 +475,17 @@ def import_files(
473
475
# Return the number of imported RagFiles after completion.
474
476
print(response.imported_rag_files_count)
475
477
478
+ # Document AI Layout Parser example.
479
+ parser = LayoutParserConfig(
480
+ processor_name="projects/my-project/locations/us-central1/processors/my-processor-id",
481
+ max_parsing_requests_per_min=120,
482
+ )
483
+ response = rag.import_files(
484
+ corpus_name="projects/my-project/locations/us-central1/ragCorpora/my-corpus-1",
485
+ paths=paths,
486
+ parser=parser,
487
+ )
488
+
476
489
```
477
490
Args:
478
491
corpus_name: The name of the RagCorpus resource into which to import files.
@@ -504,6 +517,9 @@ def import_files(
504
517
exist - if it does not exist, it will be created. If it does exist,
505
518
the schema will be checked and the partial failures will be appended
506
519
to the table.
520
+ parser: Document parser to use. Should be either None (default parser),
521
+ or a LayoutParserConfig (to parse documents using a Document AI
522
+ Layout Parser processor).
507
523
Returns:
508
524
ImportRagFilesResponse.
509
525
"""
@@ -519,6 +535,7 @@ def import_files(
519
535
transformation_config = transformation_config ,
520
536
max_embedding_requests_per_min = max_embedding_requests_per_min ,
521
537
partial_failures_sink = partial_failures_sink ,
538
+ parser = parser ,
522
539
)
523
540
client = _gapic_utils .create_rag_data_service_client ()
524
541
try :
@@ -536,6 +553,7 @@ async def import_files_async(
536
553
transformation_config : Optional [TransformationConfig ] = None ,
537
554
max_embedding_requests_per_min : int = 1000 ,
538
555
partial_failures_sink : Optional [str ] = None ,
556
+ parser : Optional [LayoutParserConfig ] = None ,
539
557
) -> operation_async .AsyncOperation :
540
558
"""
541
559
Import files to an existing RagCorpus asynchronously.
@@ -612,6 +630,17 @@ async def import_files_async(
612
630
share_point_sources=[sharepoint_query],
613
631
)
614
632
633
+ # Document AI Layout Parser example.
634
+ parser = LayoutParserConfig(
635
+ processor_name="projects/my-project/locations/us-central1/processors/my-processor-id",
636
+ max_parsing_requests_per_min=120,
637
+ )
638
+ response = rag.import_files_async(
639
+ corpus_name="projects/my-project/locations/us-central1/ragCorpora/my-corpus-1",
640
+ paths=paths,
641
+ parser=parser,
642
+ )
643
+
615
644
# Get the result.
616
645
await response.result()
617
646
@@ -645,6 +674,9 @@ async def import_files_async(
645
674
exist - if it does not exist, it will be created. If it does exist,
646
675
the schema will be checked and the partial failures will be appended
647
676
to the table.
677
+ parser: Document parser to use. Should be either None (default parser),
678
+ or a LayoutParserConfig (to parse documents using a Document AI
679
+ Layout Parser processor).
648
680
Returns:
649
681
operation_async.AsyncOperation.
650
682
"""
@@ -660,6 +692,7 @@ async def import_files_async(
660
692
transformation_config = transformation_config ,
661
693
max_embedding_requests_per_min = max_embedding_requests_per_min ,
662
694
partial_failures_sink = partial_failures_sink ,
695
+ parser = parser ,
663
696
)
664
697
async_client = _gapic_utils .create_rag_data_service_async_client ()
665
698
try :
0 commit comments