21
21
prepare_import_files_request ,
22
22
set_embedding_model_config ,
23
23
)
24
+ from vertexai .rag .utils .resources import (
25
+ ChunkingConfig ,
26
+ TransformationConfig ,
27
+ )
24
28
from google .cloud .aiplatform_v1beta1 import (
25
29
VertexRagDataServiceAsyncClient ,
26
30
VertexRagDataServiceClient ,
@@ -327,6 +331,18 @@ def list_rag_files_pager_mock():
327
331
yield list_rag_files_pager_mock
328
332
329
333
334
+ def create_transformation_config (
335
+ chunk_size : int = test_rag_constants_preview .TEST_CHUNK_SIZE ,
336
+ chunk_overlap : int = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
337
+ ):
338
+ return TransformationConfig (
339
+ chunking_config = ChunkingConfig (
340
+ chunk_size = chunk_size ,
341
+ chunk_overlap = chunk_overlap ,
342
+ ),
343
+ )
344
+
345
+
330
346
def rag_corpus_eq (returned_corpus , expected_corpus ):
331
347
assert returned_corpus .name == expected_corpus .name
332
348
assert returned_corpus .display_name == expected_corpus .display_name
@@ -363,6 +379,10 @@ def import_files_request_eq(returned_request, expected_request):
363
379
returned_request .import_rag_files_config .rag_file_parsing_config
364
380
== expected_request .import_rag_files_config .rag_file_parsing_config
365
381
)
382
+ assert (
383
+ returned_request .import_rag_files_config .rag_file_transformation_config
384
+ == expected_request .import_rag_files_config .rag_file_transformation_config
385
+ )
366
386
367
387
368
388
@pytest .mark .usefixtures ("google_auth_mock" )
@@ -795,6 +815,17 @@ def test_delete_file_failure(self):
795
815
e .match ("Failed in RagFile deletion due to" )
796
816
797
817
def test_prepare_import_files_request_list_gcs_uris (self ):
818
+ paths = [test_rag_constants_preview .TEST_GCS_PATH ]
819
+ request = prepare_import_files_request (
820
+ corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
821
+ paths = paths ,
822
+ transformation_config = create_transformation_config (),
823
+ )
824
+ import_files_request_eq (
825
+ request , test_rag_constants_preview .TEST_IMPORT_REQUEST_GCS
826
+ )
827
+
828
+ def test_prepare_import_files_request_list_gcs_uris_no_transformation_config (self ):
798
829
paths = [test_rag_constants_preview .TEST_GCS_PATH ]
799
830
request = prepare_import_files_request (
800
831
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
@@ -817,8 +848,7 @@ def test_prepare_import_files_request_drive_folders(self, path):
817
848
request = prepare_import_files_request (
818
849
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
819
850
paths = [path ],
820
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
821
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
851
+ transformation_config = create_transformation_config (),
822
852
)
823
853
import_files_request_eq (
824
854
request , test_rag_constants_preview .TEST_IMPORT_REQUEST_DRIVE_FOLDER
@@ -835,8 +865,7 @@ def test_prepare_import_files_request_drive_folders_with_pdf_parsing(self, path)
835
865
request = prepare_import_files_request (
836
866
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
837
867
paths = [path ],
838
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
839
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
868
+ transformation_config = create_transformation_config (),
840
869
use_advanced_pdf_parsing = True ,
841
870
)
842
871
import_files_request_eq (
@@ -848,8 +877,7 @@ def test_prepare_import_files_request_drive_files(self):
848
877
request = prepare_import_files_request (
849
878
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
850
879
paths = paths ,
851
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
852
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
880
+ transformation_config = create_transformation_config (),
853
881
max_embedding_requests_per_min = 800 ,
854
882
)
855
883
import_files_request_eq (
@@ -862,8 +890,7 @@ def test_prepare_import_files_request_invalid_drive_path(self):
862
890
prepare_import_files_request (
863
891
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
864
892
paths = paths ,
865
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
866
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
893
+ transformation_config = create_transformation_config (),
867
894
)
868
895
e .match ("is not a valid Google Drive url" )
869
896
@@ -873,17 +900,15 @@ def test_prepare_import_files_request_invalid_path(self):
873
900
prepare_import_files_request (
874
901
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
875
902
paths = paths ,
876
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
877
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
903
+ transformation_config = create_transformation_config (),
878
904
)
879
905
e .match ("path must be a Google Cloud Storage uri or a Google Drive url" )
880
906
881
907
def test_prepare_import_files_request_slack_source (self ):
882
908
request = prepare_import_files_request (
883
909
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
884
910
source = test_rag_constants_preview .TEST_SLACK_SOURCE ,
885
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
886
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
911
+ transformation_config = create_transformation_config (),
887
912
)
888
913
import_files_request_eq (
889
914
request , test_rag_constants_preview .TEST_IMPORT_REQUEST_SLACK_SOURCE
@@ -893,8 +918,7 @@ def test_prepare_import_files_request_jira_source(self):
893
918
request = prepare_import_files_request (
894
919
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
895
920
source = test_rag_constants_preview .TEST_JIRA_SOURCE ,
896
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
897
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
921
+ transformation_config = create_transformation_config (),
898
922
)
899
923
import_files_request_eq (
900
924
request , test_rag_constants_preview .TEST_IMPORT_REQUEST_JIRA_SOURCE
@@ -904,8 +928,7 @@ def test_prepare_import_files_request_sharepoint_source(self):
904
928
request = prepare_import_files_request (
905
929
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
906
930
source = test_rag_constants_preview .TEST_SHARE_POINT_SOURCE ,
907
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
908
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
931
+ transformation_config = create_transformation_config (),
909
932
)
910
933
import_files_request_eq (
911
934
request , test_rag_constants_preview .TEST_IMPORT_REQUEST_SHARE_POINT_SOURCE
@@ -916,8 +939,7 @@ def test_prepare_import_files_request_sharepoint_source_2_drives(self):
916
939
prepare_import_files_request (
917
940
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
918
941
source = test_rag_constants_preview .TEST_SHARE_POINT_SOURCE_2_DRIVES ,
919
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
920
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
942
+ transformation_config = create_transformation_config (),
921
943
)
922
944
e .match ("drive_name and drive_id cannot both be set." )
923
945
@@ -926,8 +948,7 @@ def test_prepare_import_files_request_sharepoint_source_2_folders(self):
926
948
prepare_import_files_request (
927
949
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
928
950
source = test_rag_constants_preview .TEST_SHARE_POINT_SOURCE_2_FOLDERS ,
929
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
930
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
951
+ transformation_config = create_transformation_config (),
931
952
)
932
953
e .match ("sharepoint_folder_path and sharepoint_folder_id cannot both be set." )
933
954
@@ -936,17 +957,15 @@ def test_prepare_import_files_request_sharepoint_source_no_drives(self):
936
957
prepare_import_files_request (
937
958
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
938
959
source = test_rag_constants_preview .TEST_SHARE_POINT_SOURCE_NO_DRIVES ,
939
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
940
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
960
+ transformation_config = create_transformation_config (),
941
961
)
942
962
e .match ("Either drive_name and drive_id must be set." )
943
963
944
964
def test_prepare_import_files_request_sharepoint_source_no_folders (self ):
945
965
request = prepare_import_files_request (
946
966
corpus_name = test_rag_constants_preview .TEST_RAG_CORPUS_RESOURCE_NAME ,
947
967
source = test_rag_constants_preview .TEST_SHARE_POINT_SOURCE_NO_FOLDERS ,
948
- chunk_size = test_rag_constants_preview .TEST_CHUNK_SIZE ,
949
- chunk_overlap = test_rag_constants_preview .TEST_CHUNK_OVERLAP ,
968
+ transformation_config = create_transformation_config (),
950
969
)
951
970
import_files_request_eq (
952
971
request ,
0 commit comments