45
45
from vertexai .preview .rag .utils .resources import (
46
46
EmbeddingModelConfig ,
47
47
JiraSource ,
48
+ LayoutParserConfig ,
48
49
Pinecone ,
49
50
RagCorpus ,
50
51
RagFile ,
@@ -466,6 +467,7 @@ def import_files(
466
467
max_embedding_requests_per_min : int = 1000 ,
467
468
use_advanced_pdf_parsing : Optional [bool ] = False ,
468
469
partial_failures_sink : Optional [str ] = None ,
470
+ layout_parser : Optional [LayoutParserConfig ] = None ,
469
471
) -> ImportRagFilesResponse :
470
472
"""
471
473
Import files to an existing RagCorpus, wait until completion.
@@ -581,13 +583,21 @@ def import_files(
581
583
exist - if it does not exist, it will be created. If it does exist,
582
584
the schema will be checked and the partial failures will be appended
583
585
to the table.
586
+ layout_parser: Configuration for the Document AI Layout Parser Processor
587
+ to use for document parsing. Optional.
588
+ If not None,`use_advanced_pdf_parsing` must be False.
584
589
Returns:
585
590
ImportRagFilesResponse.
586
591
"""
587
592
if source is not None and paths is not None :
588
593
raise ValueError ("Only one of source or paths must be passed in at a time" )
589
594
if source is None and paths is None :
590
595
raise ValueError ("One of source or paths must be passed in" )
596
+ if use_advanced_pdf_parsing and layout_parser is not None :
597
+ raise ValueError (
598
+ "Only one of use_advanced_pdf_parsing or layout_parser may be "
599
+ "passed in at a time"
600
+ )
591
601
corpus_name = _gapic_utils .get_corpus_name (corpus_name )
592
602
request = _gapic_utils .prepare_import_files_request (
593
603
corpus_name = corpus_name ,
@@ -599,6 +609,7 @@ def import_files(
599
609
max_embedding_requests_per_min = max_embedding_requests_per_min ,
600
610
use_advanced_pdf_parsing = use_advanced_pdf_parsing ,
601
611
partial_failures_sink = partial_failures_sink ,
612
+ layout_parser = layout_parser ,
602
613
)
603
614
client = _gapic_utils .create_rag_data_service_client ()
604
615
try :
@@ -619,6 +630,7 @@ async def import_files_async(
619
630
max_embedding_requests_per_min : int = 1000 ,
620
631
use_advanced_pdf_parsing : Optional [bool ] = False ,
621
632
partial_failures_sink : Optional [str ] = None ,
633
+ layout_parser : Optional [LayoutParserConfig ] = None ,
622
634
) -> operation_async .AsyncOperation :
623
635
"""
624
636
Import files to an existing RagCorpus asynchronously.
@@ -734,13 +746,21 @@ async def import_files_async(
734
746
exist - if it does not exist, it will be created. If it does exist,
735
747
the schema will be checked and the partial failures will be appended
736
748
to the table.
749
+ layout_parser: Configuration for the Document AI Layout Parser Processor
750
+ to use for document parsing. Optional.
751
+ If not None,`use_advanced_pdf_parsing` must be False.
737
752
Returns:
738
753
operation_async.AsyncOperation.
739
754
"""
740
755
if source is not None and paths is not None :
741
756
raise ValueError ("Only one of source or paths must be passed in at a time" )
742
757
if source is None and paths is None :
743
758
raise ValueError ("One of source or paths must be passed in" )
759
+ if use_advanced_pdf_parsing and layout_parser is not None :
760
+ raise ValueError (
761
+ "Only one of use_advanced_pdf_parsing or layout_parser may be "
762
+ "passed in at a time"
763
+ )
744
764
corpus_name = _gapic_utils .get_corpus_name (corpus_name )
745
765
request = _gapic_utils .prepare_import_files_request (
746
766
corpus_name = corpus_name ,
@@ -752,6 +772,7 @@ async def import_files_async(
752
772
max_embedding_requests_per_min = max_embedding_requests_per_min ,
753
773
use_advanced_pdf_parsing = use_advanced_pdf_parsing ,
754
774
partial_failures_sink = partial_failures_sink ,
775
+ layout_parser = layout_parser ,
755
776
)
756
777
async_client = _gapic_utils .create_rag_data_service_async_client ()
757
778
try :
0 commit comments