Skip to content

Commit 88c6a6a

Browse files
yinghsienwucopybara-github
authored andcommitted
fix: Generalize RAG files import from Google Drive
PiperOrigin-RevId: 639184120
1 parent ba65828 commit 88c6a6a

File tree

3 files changed

+10
-5
lines changed

3 files changed

+10
-5
lines changed

tests/unit/vertex_rag/test_rag_constants.py

+3
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@
9191
TEST_DRIVE_FOLDER = (
9292
f"https://drive.google.com/corp/drive/folders/{TEST_DRIVE_FOLDER_ID}"
9393
)
94+
TEST_DRIVE_FOLDER_2 = (
95+
f"https://drive.google.com/drive/folders/{TEST_DRIVE_FOLDER_ID}?resourcekey=0-eiOT3"
96+
)
9497
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER = ImportRagFilesConfig()
9598
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER.google_drive_source.resource_ids = [
9699
GoogleDriveSource.ResourceId(

tests/unit/vertex_rag/test_rag_data.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -374,11 +374,11 @@ def test_prepare_import_files_request_list_gcs_uris(self):
374374
)
375375
import_files_request_eq(request, tc.TEST_IMPORT_REQUEST_GCS)
376376

377-
def test_prepare_import_files_request_drive_folders(self):
378-
paths = [tc.TEST_DRIVE_FOLDER]
377+
@pytest.mark.parametrize("path", [tc.TEST_DRIVE_FOLDER, tc.TEST_DRIVE_FOLDER_2])
378+
def test_prepare_import_files_request_drive_folders(self, path):
379379
request = prepare_import_files_request(
380380
corpus_name=tc.TEST_RAG_CORPUS_RESOURCE_NAME,
381-
paths=paths,
381+
paths=[path],
382382
chunk_size=tc.TEST_CHUNK_SIZE,
383383
chunk_overlap=tc.TEST_CHUNK_OVERLAP,
384384
)

vertexai/preview/rag/utils/_gapic_utils.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,12 @@ def convert_path_to_resource_id(
9797
# Google Drive source
9898
path_list = path.split("/")
9999
if "file" in path_list:
100-
resource_id = path_list[5]
100+
index = path_list.index("file") + 2
101+
resource_id = path_list[index].split("?")[0]
101102
resource_type = GoogleDriveSource.ResourceId.ResourceType.RESOURCE_TYPE_FILE
102103
elif "folders" in path_list:
103-
resource_id = path_list[6]
104+
index = path_list.index("folders") + 1
105+
resource_id = path_list[index].split("?")[0]
104106
resource_type = (
105107
GoogleDriveSource.ResourceId.ResourceType.RESOURCE_TYPE_FOLDER
106108
)

0 commit comments

Comments
 (0)