From 4fcc49b5e46e90301a8797ac027ddaa16c68a1d4 Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Fri, 6 Jun 2025 10:30:07 +0800 Subject: [PATCH 1/7] [feat](param-refactor)Rename FS v2 to v1, remove legacy FS, and integrate new Storage/Azure features This pull request includes the following updates: Rename FS v2 to v1 The current FS v2 implementation has been renamed to FS v1, making it the default and primary file system implementation. Remove Legacy FS All code and dependencies related to the legacy FS (the old v1 implementation) have been completely removed. Repository Cleanup Dropped all compatibility logic for the old FS. The Repository layer now exclusively uses the new FS and Storage interfaces. Deprecated configuration entries, injection code, and transitional hooks have been removed. New Storage and Azure Feature Integration Added support for new Azure file system features Improved integration between the Storage layer and Repository for better scalability. --- .../org/apache/doris/analysis/LoadStmt.java | 2 +- .../apache/doris/backup/BackupHandler.java | 41 +- .../org/apache/doris/backup/Repository.java | 42 +- .../apache/doris/backup/RepositoryMgr.java | 4 +- .../apache/doris/catalog/AzureResource.java | 5 +- .../doris/catalog/HdfsStorageVault.java | 5 +- .../org/apache/doris/catalog/S3Resource.java | 5 +- .../apache/doris/common/util/BrokerUtil.java | 6 +- .../doris/common/util/LocationPath.java | 2 +- .../datasource/ExternalMetaCacheMgr.java | 2 +- .../doris/datasource/hive/AcidUtil.java | 4 +- .../datasource/hive/HMSExternalCatalog.java | 6 +- .../datasource/hive/HMSExternalTable.java | 2 +- .../doris/datasource/hive/HMSTransaction.java | 12 +- .../datasource/hive/HiveMetaStoreCache.java | 16 +- .../doris/datasource/hive/HiveUtil.java | 4 +- .../datasource/hive/source/HiveScanNode.java | 2 +- .../datasource/hudi/source/HudiScanNode.java | 2 +- .../property/storage/AzureProperties.java | 16 +- .../property/storage/S3Properties.java | 2 +- .../org/apache/doris/fs/FileSystemCache.java | 5 +- .../apache/doris/fs/FileSystemFactory.java | 100 ++- .../doris/fs/FileSystemProviderImpl.java | 2 +- .../org/apache/doris/fs/FileSystemType.java | 23 +- .../apache/doris/fs/PersistentFileSystem.java | 11 +- .../doris/{fsv2 => fs}/StorageTypeMapper.java | 10 +- ...ransactionScopeCachingDirectoryLister.java | 11 +- ...ionScopeCachingDirectoryListerFactory.java | 6 +- .../apache/doris/fs/obj/AzureObjStorage.java | 105 ++- .../org/apache/doris/fs/obj/ObjStorage.java | 2 +- .../org/apache/doris/fs/obj/S3ObjStorage.java | 114 ++- .../doris/fs/remote/AzureFileSystem.java | 19 +- .../doris/fs/remote/BrokerFileSystem.java | 3 + .../doris/fs/remote/RemoteFileSystem.java | 9 +- .../apache/doris/fs/remote/S3FileSystem.java | 145 +--- .../doris/fs/remote/SwitchingFileSystem.java | 12 +- .../doris/fs/remote/dfs/DFSFileSystem.java | 29 +- .../doris/fs/remote/dfs/JFSFileSystem.java | 7 +- .../doris/fs/remote/dfs/OFSFileSystem.java | 7 +- .../remote/dfs/OSSHdfsFileSystem.java | 2 +- .../apache/doris/fsv2/DirectoryLister.java | 29 - .../org/apache/doris/fsv2/FileSystem.java | 97 --- .../apache/doris/fsv2/FileSystemCache.java | 117 --- .../doris/fsv2/FileSystemDirectoryLister.java | 37 - .../apache/doris/fsv2/FileSystemFactory.java | 82 -- .../doris/fsv2/FileSystemIOException.java | 65 -- .../apache/doris/fsv2/FileSystemProvider.java | 24 - .../doris/fsv2/FileSystemProviderImpl.java | 43 -- .../org/apache/doris/fsv2/FileSystemType.java | 49 -- .../org/apache/doris/fsv2/FileSystemUtil.java | 70 -- .../apache/doris/fsv2/LocalDfsFileSystem.java | 199 ----- .../doris/fsv2/PersistentFileSystem.java | 95 --- .../doris/fsv2/RemoteFileRemoteIterator.java | 47 -- .../org/apache/doris/fsv2/RemoteFiles.java | 35 - .../org/apache/doris/fsv2/RemoteIterator.java | 27 - .../doris/fsv2/SimpleRemoteIterator.java | 45 -- .../TransactionDirectoryListingCacheKey.java | 64 -- ...ransactionScopeCachingDirectoryLister.java | 219 ------ ...ionScopeCachingDirectoryListerFactory.java | 59 -- .../doris/fsv2/obj/AzureObjStorage.java | 381 ---------- .../org/apache/doris/fsv2/obj/ObjStorage.java | 69 -- .../apache/doris/fsv2/obj/RemoteObject.java | 54 -- .../apache/doris/fsv2/obj/RemoteObjects.java | 52 -- .../apache/doris/fsv2/obj/S3ObjStorage.java | 493 ------------ .../doris/fsv2/remote/AzureFileSystem.java | 48 -- .../doris/fsv2/remote/BrokerFileSystem.java | 704 ------------------ .../doris/fsv2/remote/ObjFileSystem.java | 165 ---- .../fsv2/remote/RemoteFSPhantomManager.java | 126 ---- .../apache/doris/fsv2/remote/RemoteFile.java | 106 --- .../doris/fsv2/remote/RemoteFileSystem.java | 141 ---- .../RemoteFileSystemPhantomReference.java | 44 -- .../doris/fsv2/remote/S3FileSystem.java | 106 --- .../fsv2/remote/SwitchingFileSystem.java | 132 ---- .../doris/fsv2/remote/dfs/DFSFileSystem.java | 500 ------------- .../doris/fsv2/remote/dfs/JFSFileSystem.java | 27 - .../doris/fsv2/remote/dfs/OFSFileSystem.java | 27 - .../translator/PhysicalPlanTranslator.java | 6 +- .../transaction/HiveTransactionManager.java | 2 +- .../TransactionManagerFactory.java | 2 +- .../apache/doris/backup/BackupJobTest.java | 4 +- .../apache/doris/backup/RepositoryTest.java | 30 +- .../apache/doris/backup/RestoreJobTest.java | 4 +- .../doris/common/util/LocationPathTest.java | 2 +- .../doris/datasource/hive/HiveAcidTest.java | 2 +- .../doris/datasource/hive/HmsCommitTest.java | 8 +- .../iceberg/IcebergHadoopCatalogTest.java | 3 +- .../doris/fs/obj/AzureObjStorageTest.java | 16 +- .../{fsv2 => fs}/obj/S3FileSystemTest.java | 12 +- .../fs/obj/S3ObjStorageGlobListTest.java | 5 +- .../apache/doris/fs/obj/S3ObjStorageTest.java | 309 ++++---- .../remote/RemoteFileSystemTest.java | 2 +- .../doris/fsv2/obj/S3ObjStorageTest.java | 209 ------ 92 files changed, 579 insertions(+), 5417 deletions(-) rename fe/fe-core/src/main/java/org/apache/doris/{fsv2 => fs}/StorageTypeMapper.java (91%) rename fe/fe-core/src/main/java/org/apache/doris/{fsv2 => fs}/remote/dfs/OSSHdfsFileSystem.java (96%) delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/DirectoryLister.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemCache.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemDirectoryLister.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemFactory.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemIOException.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemProvider.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemProviderImpl.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemType.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemUtil.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/LocalDfsFileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/PersistentFileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteFileRemoteIterator.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteFiles.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteIterator.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/SimpleRemoteIterator.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionDirectoryListingCacheKey.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionScopeCachingDirectoryLister.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionScopeCachingDirectoryListerFactory.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/AzureObjStorage.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/ObjStorage.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/RemoteObject.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/RemoteObjects.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/S3ObjStorage.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/AzureFileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/BrokerFileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/ObjFileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFSPhantomManager.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFile.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFileSystemPhantomReference.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/S3FileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/SwitchingFileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/DFSFileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/JFSFileSystem.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/OFSFileSystem.java rename fe/fe-core/src/test/java/org/apache/doris/{fsv2 => fs}/obj/S3FileSystemTest.java (97%) rename fe/fe-core/src/test/java/org/apache/doris/{fsv2 => fs}/remote/RemoteFileSystemTest.java (99%) delete mode 100644 fe/fe-core/src/test/java/org/apache/doris/fsv2/obj/S3ObjStorageTest.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java index 02b014fd26dab0..e1331d835613e6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java @@ -30,7 +30,7 @@ import org.apache.doris.common.util.PrintableMap; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.property.storage.ObjectStorageProperties; -import org.apache.doris.fsv2.FileSystemFactory; +import org.apache.doris.fs.FileSystemFactory; import org.apache.doris.load.EtlJobType; import org.apache.doris.load.loadv2.LoadTask; import org.apache.doris.mysql.privilege.PrivPredicate; diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java index f6bae592f35be0..294d9b3bed5d18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java @@ -44,17 +44,16 @@ import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.Pair; -import org.apache.doris.common.UserException; import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.DebugPointUtil; import org.apache.doris.common.util.MasterDaemon; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.common.util.Util; import org.apache.doris.datasource.property.storage.StorageProperties; -import org.apache.doris.fsv2.FileSystemFactory; -import org.apache.doris.fsv2.remote.AzureFileSystem; -import org.apache.doris.fsv2.remote.RemoteFileSystem; -import org.apache.doris.fsv2.remote.S3FileSystem; +import org.apache.doris.fs.FileSystemFactory; +import org.apache.doris.fs.remote.AzureFileSystem; +import org.apache.doris.fs.remote.RemoteFileSystem; +import org.apache.doris.fs.remote.S3FileSystem; import org.apache.doris.nereids.trees.plans.commands.CancelBackupCommand; import org.apache.doris.nereids.trees.plans.commands.CreateRepositoryCommand; import org.apache.doris.persist.BarrierLog; @@ -218,17 +217,10 @@ public void createRepository(CreateRepositoryCommand command) throws DdlExceptio } RemoteFileSystem fileSystem; - try { - fileSystem = FileSystemFactory.get(command.getStorageType(), command.getProperties()); - } catch (UserException e) { - throw new DdlException("Failed to initialize remote file system: " + e.getMessage()); - } - org.apache.doris.fs.remote.RemoteFileSystem oldfs = org.apache.doris.fs.FileSystemFactory - .get(command.getBrokerName(), command.getStorageType(), - command.getProperties()); + fileSystem = FileSystemFactory.get(command.getStorageType(), command.getBrokerName(), command.getProperties()); long repoId = env.getNextId(); Repository repo = new Repository(repoId, command.getName(), command.isReadOnly(), command.getLocation(), - fileSystem, oldfs); + fileSystem); Status st = repoMgr.addAndInitRepoIfNotExist(repo, false); if (!st.ok()) { @@ -250,17 +242,10 @@ public void createRepository(CreateRepositoryStmt stmt) throws DdlException { } RemoteFileSystem fileSystem; - try { - fileSystem = FileSystemFactory.get(stmt.getStorageType(), stmt.getProperties()); - } catch (UserException e) { - throw new DdlException("Failed to initialize remote file system: " + e.getMessage()); - } - org.apache.doris.fs.remote.RemoteFileSystem oldfs = org.apache.doris.fs.FileSystemFactory - .get(stmt.getBrokerName(), stmt.getStorageType(), - stmt.getProperties()); + fileSystem = FileSystemFactory.get(stmt.getStorageType(), stmt.getBrokerName(), stmt.getProperties()); long repoId = env.getNextId(); Repository repo = new Repository(repoId, stmt.getName(), stmt.isReadOnly(), stmt.getLocation(), - fileSystem, oldfs); + fileSystem); Status st = repoMgr.addAndInitRepoIfNotExist(repo, false); if (!st.ok()) { @@ -295,18 +280,10 @@ public void alterRepository(String repoName, Map newProps, boole Map mergedProps = mergeProperties(oldRepo, newProps, strictCheck); // Create new remote file system with merged properties RemoteFileSystem fileSystem = FileSystemFactory.get(StorageProperties.createPrimary(mergedProps)); - org.apache.doris.fs.remote.RemoteFileSystem oldfs = null; - if (oldRepo.getRemoteFileSystem() instanceof S3FileSystem) { - oldfs = org.apache.doris.fs.FileSystemFactory.get(oldRepo.getRemoteFileSystem().getName(), - StorageBackend.StorageType.S3, mergedProps); - } else if (oldRepo.getRemoteFileSystem() instanceof AzureFileSystem) { - oldfs = org.apache.doris.fs.FileSystemFactory.get(oldRepo.getRemoteFileSystem().getName(), - StorageBackend.StorageType.AZURE, mergedProps); - } // Create new Repository instance with updated file system Repository newRepo = new Repository( oldRepo.getId(), oldRepo.getName(), oldRepo.isReadOnly(), - oldRepo.getLocation(), fileSystem, oldfs + oldRepo.getLocation(), fileSystem ); // Verify the repository can be connected with new settings if (!newRepo.ping()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java b/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java index 5154c74ab58294..12dec75c8bb8b1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java @@ -32,12 +32,12 @@ import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.property.constants.S3Properties; import org.apache.doris.datasource.property.storage.StorageProperties; -import org.apache.doris.fsv2.FileSystemFactory; -import org.apache.doris.fsv2.PersistentFileSystem; -import org.apache.doris.fsv2.remote.BrokerFileSystem; -import org.apache.doris.fsv2.remote.RemoteFile; -import org.apache.doris.fsv2.remote.RemoteFileSystem; -import org.apache.doris.fsv2.remote.S3FileSystem; +import org.apache.doris.fs.FileSystemFactory; +import org.apache.doris.fs.PersistentFileSystem; +import org.apache.doris.fs.remote.BrokerFileSystem; +import org.apache.doris.fs.remote.RemoteFile; +import org.apache.doris.fs.remote.RemoteFileSystem; +import org.apache.doris.fs.remote.S3FileSystem; import org.apache.doris.persist.gson.GsonPostProcessable; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.system.Backend; @@ -132,28 +132,22 @@ public class Repository implements Writable, GsonPostProcessable { private String location; @SerializedName("fs") - private org.apache.doris.fs.PersistentFileSystem oldfs; - - // Temporary field: currently still using the legacy fs config (oldfs). - // This field can be removed once the new fs configuration is fully enabled. private PersistentFileSystem fileSystem; - public org.apache.doris.fs.PersistentFileSystem getOldfs() { - return oldfs; + public PersistentFileSystem getFileSystem() { + return fileSystem; } private Repository() { // for persist } - public Repository(long id, String name, boolean isReadOnly, String location, RemoteFileSystem fileSystem, - org.apache.doris.fs.PersistentFileSystem oldFs) { + public Repository(long id, String name, boolean isReadOnly, String location, RemoteFileSystem fileSystem) { this.id = id; this.name = name; this.isReadOnly = isReadOnly; this.location = location; this.fileSystem = fileSystem; - this.oldfs = oldFs; this.createTime = System.currentTimeMillis(); } @@ -248,16 +242,8 @@ public Status alterRepositoryS3Properties(Map properties) { @Override public void gsonPostProcess() { StorageBackend.StorageType type = StorageBackend.StorageType.BROKER; - if (this.oldfs.properties.containsKey(org.apache.doris.fs.PersistentFileSystem.STORAGE_TYPE)) { - type = StorageBackend.StorageType.valueOf( - this.oldfs.properties.get(org.apache.doris.fs.PersistentFileSystem.STORAGE_TYPE)); - this.oldfs.properties.remove(org.apache.doris.fs.PersistentFileSystem.STORAGE_TYPE); - } - this.oldfs = org.apache.doris.fs.FileSystemFactory.get(this.oldfs.getName(), - type, - this.oldfs.getProperties()); if (!type.equals(StorageBackend.StorageType.BROKER)) { - StorageProperties storageProperties = StorageProperties.createPrimary(this.oldfs.properties); + StorageProperties storageProperties = StorageProperties.createPrimary(this.fileSystem.properties); this.fileSystem = FileSystemFactory.get(storageProperties); } } @@ -867,13 +853,7 @@ public void readFields(DataInput in) throws IOException { name = Text.readString(in); isReadOnly = in.readBoolean(); location = Text.readString(in); - oldfs = org.apache.doris.fs.PersistentFileSystem.read(in); - try { - fileSystem = FileSystemFactory.get(oldfs.getStorageType(), oldfs.getProperties()); - } catch (UserException e) { - // do we ignore this exception? - throw new IOException("Failed to create file system: " + e.getMessage()); - } + fileSystem = PersistentFileSystem.read(in); createTime = in.readLong(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RepositoryMgr.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RepositoryMgr.java index d57593c5443098..ee183478e9b4ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RepositoryMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RepositoryMgr.java @@ -23,8 +23,8 @@ import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.Daemon; -import org.apache.doris.fsv2.remote.AzureFileSystem; -import org.apache.doris.fsv2.remote.S3FileSystem; +import org.apache.doris.fs.remote.AzureFileSystem; +import org.apache.doris.fs.remote.S3FileSystem; import org.apache.doris.persist.gson.GsonPostProcessable; import org.apache.doris.persist.gson.GsonUtils; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/AzureResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/AzureResource.java index c24c26ee22c89e..2a6c39f2c4452d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AzureResource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AzureResource.java @@ -22,6 +22,8 @@ import org.apache.doris.common.proc.BaseProcResult; import org.apache.doris.common.util.PrintableMap; import org.apache.doris.datasource.property.constants.S3Properties; +import org.apache.doris.datasource.property.storage.AzureProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.fs.obj.AzureObjStorage; import org.apache.doris.fs.obj.ObjStorage; import org.apache.doris.fs.obj.RemoteObjects; @@ -90,7 +92,8 @@ protected static void pingAzure(String bucketName, String rootPath, byte[] contentData = new byte[2 * ObjStorage.CHUNK_SIZE]; Arrays.fill(contentData, (byte) 'A'); - AzureObjStorage azureObjStorage = new AzureObjStorage(newProperties); + AzureProperties azureProperties = (AzureProperties) StorageProperties.createPrimary(newProperties); + AzureObjStorage azureObjStorage = new AzureObjStorage(azureProperties); Status status = azureObjStorage.putObject(testObj, new ByteArrayInputStream(contentData), contentData.length); if (!Status.OK.equals(status)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsStorageVault.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsStorageVault.java index 3b6aae6c7cf6b6..8476c9f4a5ef6d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsStorageVault.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsStorageVault.java @@ -23,6 +23,8 @@ import org.apache.doris.common.security.authentication.AuthenticationConfig; import org.apache.doris.common.util.PrintableMap; import org.apache.doris.datasource.property.constants.S3Properties; +import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.fs.remote.dfs.DFSFileSystem; import com.google.common.base.Preconditions; @@ -130,7 +132,8 @@ public static void checkConnectivity(Map newProperties) throws D Preconditions.checkArgument( !Strings.isNullOrEmpty(pathPrefix), "%s is null or empty", PropertyKey.VAULT_PATH_PREFIX); - try (DFSFileSystem dfsFileSystem = new DFSFileSystem(newProperties)) { + try (DFSFileSystem dfsFileSystem = new DFSFileSystem((HdfsCompatibleProperties) StorageProperties + .createPrimary(newProperties))) { Long timestamp = System.currentTimeMillis(); String remotePath = hadoopFsName + "/" + pathPrefix + "/doris-check-connectivity" + timestamp.toString(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java index 22df2c9082f5cd..35219dbe3c522a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java @@ -23,6 +23,8 @@ import org.apache.doris.common.proc.BaseProcResult; import org.apache.doris.common.util.PrintableMap; import org.apache.doris.datasource.property.constants.S3Properties; +import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.fs.obj.ObjStorage; import org.apache.doris.fs.obj.RemoteObjects; import org.apache.doris.fs.obj.S3ObjStorage; @@ -126,7 +128,8 @@ protected static void pingS3(String bucketName, String rootPath, Map origProps) { @@ -121,7 +123,7 @@ public static boolean guessIsMe(Map origProps) { .findFirst() .orElse(null); if (!Strings.isNullOrEmpty(value)) { - return value.endsWith("blob.core.windows.net"); + return value.endsWith(AZURE_ENDPOINT_SUFFIX); } return false; } @@ -139,6 +141,18 @@ public Map getBackendConfigProperties() { return s3Props; } + public static final String AZURE_ENDPOINT_TEMPLATE = "https://%s.blob.core.windows.net"; + + private String formatAzureEndpoint(String endpoint) { + if (Config.force_azure_blob_global_endpoint) { + return String.format(AZURE_ENDPOINT_TEMPLATE, accessKey); + } + if (endpoint.contains("://")) { + return endpoint; + } + return "https://" + endpoint; + } + @Override public String validateAndNormalizeUri(String url) throws UserException { return S3PropertyUtils.validateAndNormalizeUri(url, usePathStyle, forceParsingByStandardUrl); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java index 4305a04ebc8e50..08a5a2a1574a92 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3Properties.java @@ -142,7 +142,7 @@ protected void initNormalizeAndCheckProps() { if (StringUtils.isNotBlank(s3ExternalId) && StringUtils.isNotBlank(s3IAMRole)) { return; } - throw new StoragePropertiesException("Please set s3.access_key and s3.secret_key or s3.iam_role and " + throw new StoragePropertiesException("Please set s3.access_key and s3.secret_key or s3.role_arn and " + "s3.external_id"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java index e96258dc719fbd..017300921821c9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java @@ -20,6 +20,7 @@ import org.apache.doris.common.CacheFactory; import org.apache.doris.common.Config; import org.apache.doris.common.Pair; +import org.apache.doris.common.UserException; import org.apache.doris.fs.remote.RemoteFileSystem; import com.github.benmanes.caffeine.cache.LoadingCache; @@ -45,8 +46,8 @@ public FileSystemCache() { fileSystemCache = fsCacheFactory.buildCache(this::loadFileSystem); } - private RemoteFileSystem loadFileSystem(FileSystemCacheKey key) { - return FileSystemFactory.getRemoteFileSystem(key.type, key.getFsProperties(), key.bindBrokerName); + private RemoteFileSystem loadFileSystem(FileSystemCacheKey key) throws UserException { + return FileSystemFactory.get(key.type, key.getFsProperties(), key.bindBrokerName); } public RemoteFileSystem getRemoteFileSystem(FileSystemCacheKey key) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java index fb23005f4ac9ac..9d00700f894044 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java @@ -17,74 +17,66 @@ package org.apache.doris.fs; +import org.apache.doris.analysis.BrokerDesc; import org.apache.doris.analysis.StorageBackend; -import org.apache.doris.datasource.property.constants.AzureProperties; -import org.apache.doris.fs.remote.AzureFileSystem; +import org.apache.doris.common.UserException; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.fs.remote.BrokerFileSystem; import org.apache.doris.fs.remote.RemoteFileSystem; -import org.apache.doris.fs.remote.S3FileSystem; -import org.apache.doris.fs.remote.dfs.DFSFileSystem; -import org.apache.doris.fs.remote.dfs.JFSFileSystem; -import org.apache.doris.fs.remote.dfs.OFSFileSystem; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; - -import java.io.IOException; +import java.util.List; import java.util.Map; public class FileSystemFactory { - public static RemoteFileSystem get(String name, StorageBackend.StorageType type, Map properties) { - // TODO: rename StorageBackend.StorageType - if (type == StorageBackend.StorageType.S3) { - if (AzureProperties.checkAzureProviderPropertyExist(properties)) { - return new AzureFileSystem(properties); - } - return new S3FileSystem(properties); - } else if (type == StorageBackend.StorageType.HDFS || type == StorageBackend.StorageType.GFS) { - return new DFSFileSystem(properties); - } else if (type == StorageBackend.StorageType.OFS) { - return new OFSFileSystem(properties); - } else if (type == StorageBackend.StorageType.JFS) { - return new JFSFileSystem(properties); - } else if (type == StorageBackend.StorageType.BROKER) { - return new BrokerFileSystem(name, properties); - } else { - throw new UnsupportedOperationException(type.toString() + "backend is not implemented"); - } + public static RemoteFileSystem get(Map properties) throws UserException { + StorageProperties storageProperties = StorageProperties.createPrimary(properties); + return get(storageProperties); } - public static RemoteFileSystem getRemoteFileSystem(FileSystemType type, Map properties, - String bindBrokerName) { - switch (type) { - case S3: - if (AzureProperties.checkAzureProviderPropertyExist(properties)) { - return new AzureFileSystem(properties); - } - return new S3FileSystem(properties); - case FILE: - case DFS: - return new DFSFileSystem(properties); - case OFS: - return new OFSFileSystem(properties); - case JFS: - return new JFSFileSystem(properties); - case BROKER: - return new BrokerFileSystem(bindBrokerName, properties); - case AZURE: - return new AzureFileSystem(properties); - default: - throw new IllegalStateException("Not supported file system type: " + type); + public static RemoteFileSystem get(StorageBackend.StorageType storageType, String bindBreakName, + Map properties) { + if (storageType.equals(StorageBackend.StorageType.BROKER)) { + return new BrokerFileSystem(bindBreakName, properties); } + StorageProperties storageProperties = StorageProperties.createPrimary(properties); + return get(storageProperties); } - public static RemoteFileSystem getS3FileSystem(Map properties) { - // use for test - return get(StorageBackend.StorageType.S3.name(), StorageBackend.StorageType.S3, properties); + public static RemoteFileSystem get(StorageProperties storageProperties) { + return StorageTypeMapper.create(storageProperties); } - public static org.apache.hadoop.fs.FileSystem getNativeByPath(Path path, Configuration conf) throws IOException { - return path.getFileSystem(conf); + // This method is a temporary workaround for handling properties. + // It will be removed when broker properties are officially supported. + public static RemoteFileSystem get(String name, Map properties) { + return new BrokerFileSystem(name, properties); + } + + public static RemoteFileSystem get(FileSystemType fileSystemType, Map properties, + String bindBrokerName) + throws UserException { + if (fileSystemType == FileSystemType.BROKER) { + return new BrokerFileSystem(bindBrokerName, properties); + } + List storagePropertiesList = StorageProperties.createAll(properties); + + for (StorageProperties storageProperties : storagePropertiesList) { + if (storageProperties.getStorageName().equalsIgnoreCase(fileSystemType.name())) { + return StorageTypeMapper.create(storageProperties); + } + } + throw new RuntimeException("Unsupported file system type: " + fileSystemType); + } + + public static RemoteFileSystem get(BrokerDesc brokerDesc) { + if (null != brokerDesc.getStorageProperties()) { + return get(brokerDesc.getStorageProperties()); + } + if (null != brokerDesc.getStorageType() + && brokerDesc.getStorageType().equals(StorageBackend.StorageType.BROKER)) { + return new BrokerFileSystem(brokerDesc.getName(), brokerDesc.getProperties()); + } + throw new RuntimeException("Unexpected storage type: " + brokerDesc.getStorageType()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProviderImpl.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProviderImpl.java index 680592ab4a8719..e46121ddc94a17 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProviderImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProviderImpl.java @@ -30,7 +30,7 @@ public class FileSystemProviderImpl implements FileSystemProvider { private Map properties; public FileSystemProviderImpl(ExternalMetaCacheMgr extMetaCacheMgr, String bindBrokerName, - Map properties) { + Map properties) { this.extMetaCacheMgr = extMetaCacheMgr; this.bindBrokerName = bindBrokerName; this.properties = properties; diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemType.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemType.java index 967b5f82dd7a51..ffd94619e61a88 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemType.java @@ -17,9 +17,30 @@ package org.apache.doris.fs; +// TODO: [FileSystemType Unification] +// There are currently multiple definitions of file system types across the codebase, including but not limited to: +// 1. Backend module (e.g., FileSystemBackendType) +// 2. Location/path parsing logic (e.g., LocationType or string-based tags) +// 3. This enum: FileSystemType (used in the SPI/plugin layer) +// +// Problem: +// - File system type definitions are scattered across different modules with inconsistent naming and granularity +// - Adding a new type requires changes in multiple places, increasing risk of bugs and maintenance overhead +// - Difficult to maintain and error-prone +// +// Refactoring Goal: +// - Consolidate file system type definitions into a single source of truth +// - Clearly define the semantics and usage of each type (e.g., remote vs local, object storage vs file system) +// - All modules should reference the unified definition to avoid duplication and hardcoded strings +// +// Suggested Approach: +// - Create a centralized `FsType` enum/class as the canonical definition +// - Provide mapping or adapter methods where needed (e.g., map LocationType to FsType) +// - Gradually deprecate other definitions and annotate them with @Deprecated, including migration instructions +// public enum FileSystemType { S3, - DFS, + HDFS, OFS, JFS, BROKER, diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java index 9cb156036d9304..f94b9a3d5c5d8c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java @@ -19,10 +19,12 @@ import org.apache.doris.analysis.StorageBackend; import org.apache.doris.common.io.Text; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.persist.gson.GsonPreProcessable; import com.google.common.collect.Maps; import com.google.gson.annotations.SerializedName; +import lombok.Getter; import java.io.DataInput; import java.io.IOException; @@ -39,11 +41,8 @@ public abstract class PersistentFileSystem implements FileSystem, GsonPreProcess public String name; public StorageBackend.StorageType type; - public boolean needFullPath() { - return type == StorageBackend.StorageType.S3 - || type == StorageBackend.StorageType.OFS - || type == StorageBackend.StorageType.JFS; - } + @Getter + protected StorageProperties storageProperties; public PersistentFileSystem(String name, StorageBackend.StorageType type) { this.name = name; @@ -82,7 +81,7 @@ public static PersistentFileSystem read(DataInput in) throws IOException { type = StorageBackend.StorageType.valueOf(properties.get(STORAGE_TYPE)); properties.remove(STORAGE_TYPE); } - return FileSystemFactory.get(name, type, properties); + return FileSystemFactory.get(type, name, properties); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/StorageTypeMapper.java b/fe/fe-core/src/main/java/org/apache/doris/fs/StorageTypeMapper.java similarity index 91% rename from fe/fe-core/src/main/java/org/apache/doris/fsv2/StorageTypeMapper.java rename to fe/fe-core/src/main/java/org/apache/doris/fs/StorageTypeMapper.java index 5683934d9606d9..2ac1964bdbbb40 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/StorageTypeMapper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/StorageTypeMapper.java @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -package org.apache.doris.fsv2; +package org.apache.doris.fs; import org.apache.doris.datasource.property.storage.AzureProperties; import org.apache.doris.datasource.property.storage.COSProperties; @@ -26,10 +26,10 @@ import org.apache.doris.datasource.property.storage.OSSProperties; import org.apache.doris.datasource.property.storage.S3Properties; import org.apache.doris.datasource.property.storage.StorageProperties; -import org.apache.doris.fsv2.remote.AzureFileSystem; -import org.apache.doris.fsv2.remote.RemoteFileSystem; -import org.apache.doris.fsv2.remote.S3FileSystem; -import org.apache.doris.fsv2.remote.dfs.DFSFileSystem; +import org.apache.doris.fs.remote.AzureFileSystem; +import org.apache.doris.fs.remote.RemoteFileSystem; +import org.apache.doris.fs.remote.S3FileSystem; +import org.apache.doris.fs.remote.dfs.DFSFileSystem; import java.util.Arrays; import java.util.function.Function; diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/TransactionScopeCachingDirectoryLister.java b/fe/fe-core/src/main/java/org/apache/doris/fs/TransactionScopeCachingDirectoryLister.java index 37acec6864f8f5..31a21275a16fe9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/TransactionScopeCachingDirectoryLister.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/TransactionScopeCachingDirectoryLister.java @@ -60,7 +60,8 @@ public Cache getCache( private final DirectoryLister delegate; public TransactionScopeCachingDirectoryLister(DirectoryLister delegate, long transactionId, - Cache cache) { + Cache cache) { this.delegate = Objects.requireNonNull(delegate, "delegate is null"); this.transactionId = transactionId; this.cache = Objects.requireNonNull(cache, "cache is null"); @@ -73,7 +74,8 @@ public RemoteIterator listFiles(FileSystem fs, boolean recursive, Ta } private RemoteIterator listInternal(FileSystem fs, boolean recursive, TableIf table, - TransactionDirectoryListingCacheKey cacheKey) throws FileSystemIOException { + TransactionDirectoryListingCacheKey cacheKey) + throws FileSystemIOException { FetchingValueHolder cachedValueHolder; try { cachedValueHolder = cache.get(cacheKey, @@ -93,14 +95,15 @@ private RemoteIterator listInternal(FileSystem fs, boolean recursive } private RemoteIterator createListingRemoteIterator(FileSystem fs, boolean recursive, - TableIf table, TransactionDirectoryListingCacheKey cacheKey) + TableIf table, + TransactionDirectoryListingCacheKey cacheKey) throws FileSystemIOException { return delegate.listFiles(fs, recursive, table, cacheKey.getPath()); } private RemoteIterator cachingRemoteIterator(FetchingValueHolder cachedValueHolder, - TransactionDirectoryListingCacheKey cacheKey) { + TransactionDirectoryListingCacheKey cacheKey) { return new RemoteIterator() { private int fileIndex; diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/TransactionScopeCachingDirectoryListerFactory.java b/fe/fe-core/src/main/java/org/apache/doris/fs/TransactionScopeCachingDirectoryListerFactory.java index c3c9c347c3d2b6..f75e68e89042da 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/TransactionScopeCachingDirectoryListerFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/TransactionScopeCachingDirectoryListerFactory.java @@ -41,9 +41,9 @@ public TransactionScopeCachingDirectoryListerFactory(long maxSize) { if (maxSize > 0) { EvictableCacheBuilder cacheBuilder = EvictableCacheBuilder.newBuilder() - .maximumWeight(maxSize) - .weigher((key, value) -> - Math.toIntExact(value.getCacheFileCount())); + .maximumWeight(maxSize) + .weigher((key, value) -> + Math.toIntExact(value.getCacheFileCount())); this.cache = Optional.of(cacheBuilder.build()); } else { cache = Optional.empty(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java index 70c4e754eeb230..0c73b1b3ef5a46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java @@ -22,9 +22,7 @@ import org.apache.doris.common.UserException; import org.apache.doris.common.util.S3URI; import org.apache.doris.common.util.S3Util; -import org.apache.doris.datasource.property.PropertyConverter; -import org.apache.doris.datasource.property.constants.AzureProperties; -import org.apache.doris.datasource.property.constants.S3Properties; +import org.apache.doris.datasource.property.storage.AzureProperties; import org.apache.doris.fs.remote.RemoteFile; import com.azure.core.http.rest.PagedIterable; @@ -61,21 +59,22 @@ import java.util.Base64; import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.TreeMap; import java.util.UUID; public class AzureObjStorage implements ObjStorage { private static final Logger LOG = LogManager.getLogger(AzureObjStorage.class); - protected Map properties; + private static final String URI_TEMPLATE = "https://%s.blob.core.windows.net"; + + protected AzureProperties azureProperties; private BlobServiceClient client; - private boolean isUsePathStyle = false; + private boolean isUsePathStyle; - private boolean forceParsingByStandardUri = false; + private boolean forceParsingByStandardUri; - public AzureObjStorage(Map properties) { - this.properties = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - setProperties(properties); + public AzureObjStorage(AzureProperties azureProperties) { + this.azureProperties = azureProperties; + this.isUsePathStyle = Boolean.parseBoolean(azureProperties.getUsePathStyle()); + this.forceParsingByStandardUri = Boolean.parseBoolean(azureProperties.getForceParsingByStandardUrl()); } // To ensure compatibility with S3 usage, the path passed by the user still starts with 'S3://${containerName}'. @@ -91,44 +90,15 @@ private static String removeUselessSchema(String remotePath) { return remotePath.substring(firstSlashIndex + 1); } - public Map getProperties() { - return properties; - } - - protected void setProperties(Map properties) { - this.properties.putAll(properties); - try { - S3Properties.requiredS3Properties(this.properties); - } catch (DdlException e) { - throw new IllegalArgumentException(e); - } - // Virtual hosted-style is recommended in the s3 protocol. - // The path-style has been abandoned, but for some unexplainable reasons, - // the s3 client will determine whether the endpoint starts with `s3` - // when generating a virtual hosted-sytle request. - // If not, it will not be converted ( https://github.com/aws/aws-sdk-java-v2/pull/763), - // but the endpoints of many cloud service providers for object storage do not start with s3, - // so they cannot be converted to virtual hosted-sytle. - // Some of them, such as aliyun's oss, only support virtual hosted-style, - // and some of them(ceph) may only support - // path-style, so we need to do some additional conversion. - isUsePathStyle = this.properties.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false") - .equalsIgnoreCase("true"); - forceParsingByStandardUri = this.properties.getOrDefault(PropertyConverter.FORCE_PARSING_BY_STANDARD_URI, - "false").equalsIgnoreCase("true"); - } @Override public BlobServiceClient getClient() throws UserException { if (client == null) { - final String accountName = properties.get(S3Properties.ACCESS_KEY); - final String endpoint = AzureProperties.formatAzureEndpoint( - properties.get(S3Properties.ENDPOINT), accountName); - StorageSharedKeyCredential cred = new StorageSharedKeyCredential(accountName, - properties.get(S3Properties.SECRET_KEY)); + StorageSharedKeyCredential cred = new StorageSharedKeyCredential(azureProperties.getAccessKey(), + azureProperties.getSecretKey()); BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); builder.credential(cred); - builder.endpoint(endpoint); + builder.endpoint(azureProperties.getEndpoint()); client = builder.buildClient(); } return client; @@ -144,8 +114,10 @@ public Status headObject(String remotePath) { try { S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); BlobClient blobClient = getClient().getBlobContainerClient(uri.getBucket()).getBlobClient(uri.getKey()); - LOG.info("headObject remotePath:{} bucket:{} key:{} properties:{}", - remotePath, uri.getBucket(), uri.getKey(), blobClient.getProperties()); + if (LOG.isDebugEnabled()) { + LOG.debug("headObject remotePath:{} bucket:{} key:{} properties:{}", + remotePath, uri.getBucket(), uri.getKey(), blobClient.getProperties()); + } return Status.OK; } catch (BlobStorageException e) { if (e.getStatusCode() == HttpStatus.SC_NOT_FOUND) { @@ -167,15 +139,15 @@ public Status getObject(String remoteFilePath, File localFile) { S3URI uri = S3URI.create(remoteFilePath, isUsePathStyle, forceParsingByStandardUri); BlobClient blobClient = getClient().getBlobContainerClient(uri.getBucket()).getBlobClient(uri.getKey()); BlobProperties properties = blobClient.downloadToFile(localFile.getAbsolutePath()); - LOG.info("get file " + remoteFilePath + " success: " + properties.toString()); + if (LOG.isDebugEnabled()) { + LOG.debug("get file {} success, properties: {}", remoteFilePath, properties); + } return Status.OK; } catch (BlobStorageException e) { - LOG.warn("{} getObject exception:", remoteFilePath, e); return new Status( Status.ErrCode.COMMON_ERROR, "get file from azure error: " + e.getServiceMessage()); } catch (UserException e) { - LOG.warn("{} getObject exception:", remoteFilePath, e); return new Status(Status.ErrCode.COMMON_ERROR, "getObject " + remoteFilePath + " failed: " + e.getMessage()); } @@ -189,12 +161,10 @@ public Status putObject(String remotePath, @Nullable InputStream content, long c blobClient.upload(content, contentLength); return Status.OK; } catch (BlobStorageException e) { - LOG.warn("{} putObject exception:", remotePath, e); return new Status( Status.ErrCode.COMMON_ERROR, "Error occurred while copying the blob:: " + e.getServiceMessage()); } catch (UserException e) { - LOG.warn("{} putObject exception:", remotePath, e); return new Status(Status.ErrCode.COMMON_ERROR, "putObject " + remotePath + " failed: " + e.getMessage()); } @@ -206,7 +176,9 @@ public Status deleteObject(String remotePath) { S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); BlobClient blobClient = getClient().getBlobContainerClient(uri.getBucket()).getBlobClient(uri.getKey()); blobClient.delete(); - LOG.info("delete file " + remotePath + " success"); + if (LOG.isDebugEnabled()) { + LOG.debug("delete file {} success", remotePath); + } return Status.OK; } catch (BlobStorageException e) { if (e.getErrorCode() == BlobErrorCode.BLOB_NOT_FOUND) { @@ -250,7 +222,9 @@ public Status deleteObjects(String remotePath) { isTruncated = objects.isTruncated(); continuationToken = objects.getContinuationToken(); } while (isTruncated); - LOG.info("total delete {} objects for dir {}", totalObjects, remotePath); + if (LOG.isDebugEnabled()) { + LOG.debug("total delete {} objects for dir {}", totalObjects, remotePath); + } return Status.OK; } catch (BlobStorageException e) { return new Status(Status.ErrCode.COMMON_ERROR, "list objects for delete objects failed: " + e.getMessage()); @@ -270,7 +244,9 @@ public Status copyObject(String origFilePath, String destFilePath) { BlobClient destinationBlobClient = getClient().getBlobContainerClient(destUri.getBucket()) .getBlobClient(destUri.getKey()); destinationBlobClient.beginCopy(sourceBlobClient.getBlobUrl(), null); - LOG.info("Blob copied from " + origFilePath + " to " + destFilePath); + if (LOG.isDebugEnabled()) { + LOG.debug("Blob copy file from {} to {} success", origFilePath, destFilePath); + } return Status.OK; } catch (BlobStorageException e) { return new Status( @@ -310,9 +286,12 @@ public RemoteObjects listObjects(String remotePath, String continuationToken) th // Due to historical reasons, when the BE parses the object storage path. // It assumes the path starts with 'S3://${containerName}' // So here the path needs to be constructed in a format that BE can parse. - private String constructS3Path(String fileName, String bucket) throws UserException { - LOG.debug("the path is {}", String.format("s3://%s/%s", bucket, fileName)); - return String.format("s3://%s/%s", bucket, fileName); + private String constructS3Path(String fileName, String bucket) { + String path = String.format("s3://%s/%s", bucket, fileName); + if (LOG.isDebugEnabled()) { + LOG.debug("constructS3Path fileName:{}, bucket:{}, the path is {}", fileName, bucket, path); + } + return path; } public Status globList(String remotePath, List result, boolean fileNameOnly) { @@ -325,15 +304,21 @@ public Status globList(String remotePath, List result, boolean fileN S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); String globPath = uri.getKey(); String bucket = uri.getBucket(); - LOG.info("try to glob list for azure, remote path {}, orig {}", globPath, remotePath); + if (LOG.isDebugEnabled()) { + LOG.debug("try to glob list for azure, remote path {}, orig {}", globPath, remotePath); + } BlobContainerClient client = getClient().getBlobContainerClient(bucket); java.nio.file.Path pathPattern = Paths.get(globPath); - LOG.info("path pattern {}", pathPattern.toString()); - PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + pathPattern.toString()); + if (LOG.isDebugEnabled()) { + LOG.debug("azure glob list pathPattern: {}, bucket: {}", pathPattern, bucket); + } + PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + pathPattern); HashSet directorySet = new HashSet<>(); String listPrefix = S3Util.getLongestPrefix(globPath); - LOG.info("azure glob list prefix is {}", listPrefix); + if (LOG.isDebugEnabled()) { + LOG.debug("azure glob list prefix is {}", listPrefix); + } ListBlobsOptions options = new ListBlobsOptions().setPrefix(listPrefix); String newContinuationToken = null; do { diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/ObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/ObjStorage.java index f222dddbe83778..fceec0ca102b84 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/ObjStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/ObjStorage.java @@ -34,7 +34,7 @@ public interface ObjStorage { // CHUNK_SIZE for multi part upload - public static final int CHUNK_SIZE = 5 * 1024 * 1024; + int CHUNK_SIZE = 5 * 1024 * 1024; C getClient() throws UserException; diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java index 1a3679171f4a4a..7f03bfdc06e6ab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java @@ -22,8 +22,7 @@ import org.apache.doris.common.UserException; import org.apache.doris.common.util.S3URI; import org.apache.doris.common.util.S3Util; -import org.apache.doris.datasource.property.PropertyConverter; -import org.apache.doris.datasource.property.constants.S3Properties; +import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties; import org.apache.doris.fs.remote.RemoteFile; import org.apache.commons.lang3.StringUtils; @@ -82,11 +81,13 @@ public class S3ObjStorage implements ObjStorage { protected Map properties; + protected AbstractS3CompatibleProperties s3Properties; + private boolean isUsePathStyle = false; private boolean forceParsingByStandardUri = false; - public S3ObjStorage(Map properties) { + public S3ObjStorage(AbstractS3CompatibleProperties properties) { this.properties = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); setProperties(properties); } @@ -95,50 +96,27 @@ public Map getProperties() { return properties; } - protected void setProperties(Map properties) { - this.properties.putAll(properties); - try { - S3Properties.requiredS3Properties(this.properties); - } catch (DdlException e) { - throw new IllegalArgumentException(e); - } - // Virtual hosted-style is recommended in the s3 protocol. - // The path-style has been abandoned, but for some unexplainable reasons, - // the s3 client will determine whether the endpiont starts with `s3` - // when generating a virtual hosted-sytle request. - // If not, it will not be converted ( https://github.com/aws/aws-sdk-java-v2/pull/763), - // but the endpoints of many cloud service providers for object storage do not start with s3, - // so they cannot be converted to virtual hosted-sytle. - // Some of them, such as aliyun's oss, only support virtual hosted-style, - // and some of them(ceph) may only support - // path-style, so we need to do some additional conversion. - isUsePathStyle = this.properties.getOrDefault(PropertyConverter.USE_PATH_STYLE, "false") - .equalsIgnoreCase("true"); - forceParsingByStandardUri = this.properties.getOrDefault(PropertyConverter.FORCE_PARSING_BY_STANDARD_URI, - "false").equalsIgnoreCase("true"); - - String endpoint = this.properties.get(S3Properties.ENDPOINT); - String region = this.properties.get(S3Properties.REGION); - - this.properties.put(S3Properties.REGION, PropertyConverter.checkRegion(endpoint, region, S3Properties.REGION)); + protected void setProperties(AbstractS3CompatibleProperties properties) { + this.s3Properties = properties; + isUsePathStyle = Boolean.parseBoolean(properties.getUsePathStyle()); + forceParsingByStandardUri = Boolean.parseBoolean(s3Properties.getForceParsingByStandardUrl()); } @Override public S3Client getClient() throws UserException { if (client == null) { - String endpointStr = properties.get(S3Properties.ENDPOINT); + String endpointStr = s3Properties.getEndpoint(); if (!endpointStr.contains("://")) { endpointStr = "http://" + endpointStr; } URI endpoint = URI.create(endpointStr); - client = S3Util.buildS3Client(endpoint, properties.get(S3Properties.REGION), isUsePathStyle, - properties.get(S3Properties.ACCESS_KEY), properties.get(S3Properties.SECRET_KEY), - properties.get(S3Properties.SESSION_TOKEN), properties.get(S3Properties.ROLE_ARN), - properties.get(S3Properties.EXTERNAL_ID)); + client = S3Util.buildS3Client(endpoint, s3Properties.getRegion(), + isUsePathStyle, s3Properties.getAwsCredentialsProvider()); } return client; } + @Override public Triple getStsToken() throws DdlException { return null; @@ -150,7 +128,9 @@ public Status headObject(String remotePath) { S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); HeadObjectResponse response = getClient() .headObject(HeadObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build()); - LOG.info("head file " + remotePath + " success: " + response.toString()); + if (LOG.isDebugEnabled()) { + LOG.debug("headObject success: {}, response: {}", remotePath, response); + } return Status.OK; } catch (S3Exception e) { if (e.statusCode() == HttpStatus.SC_NOT_FOUND) { @@ -171,7 +151,9 @@ public Status getObject(String remoteFilePath, File localFile) { S3URI uri = S3URI.create(remoteFilePath, isUsePathStyle, forceParsingByStandardUri); GetObjectResponse response = getClient().getObject( GetObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build(), localFile.toPath()); - LOG.info("get file " + remoteFilePath + " success: " + response.toString()); + if (LOG.isDebugEnabled()) { + LOG.debug("get file {} success: {}", remoteFilePath, response); + } return Status.OK; } catch (S3Exception s3Exception) { return new Status( @@ -195,13 +177,15 @@ public Status putObject(String remotePath, @Nullable InputStream content, long c .putObject( PutObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build(), body); - LOG.info("put object success: " + response.toString()); + if (LOG.isDebugEnabled()) { + LOG.debug("put object success: {}", response); + } return Status.OK; } catch (S3Exception e) { - LOG.error("put object failed:", e); + LOG.warn("put object failed: ", e); return new Status(Status.ErrCode.COMMON_ERROR, "put object failed: " + e.getMessage()); } catch (Exception ue) { - LOG.error("connect to s3 failed: ", ue); + LOG.warn("connect to s3 failed: ", ue); return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); } } @@ -214,7 +198,9 @@ public Status deleteObject(String remotePath) { getClient() .deleteObject( DeleteObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build()); - LOG.info("delete file " + remotePath + " success: " + response.toString()); + if (LOG.isDebugEnabled()) { + LOG.debug("delete file {} success: {}", remotePath, response); + } return Status.OK; } catch (S3Exception e) { LOG.warn("delete file failed: ", e); @@ -251,19 +237,23 @@ public Status deleteObjects(String absolutePath) { .build(); DeleteObjectsResponse resp = getClient().deleteObjects(req); - if (resp.errors().size() > 0) { + if (!resp.errors().isEmpty()) { LOG.warn("{} errors returned while deleting {} objects for dir {}", resp.errors().size(), objectList.size(), absolutePath); } - LOG.info("{} of {} objects deleted for dir {}", - resp.deleted().size(), objectList.size(), absolutePath); - totalObjects += objectList.size(); + if (LOG.isDebugEnabled()) { + LOG.debug("{} of {} objects deleted for dir {}", + resp.deleted().size(), objectList.size(), absolutePath); + totalObjects += objectList.size(); + } } isTruncated = objects.isTruncated(); continuationToken = objects.getContinuationToken(); } while (isTruncated); - LOG.info("total delete {} objects for dir {}", totalObjects, absolutePath); + if (LOG.isDebugEnabled()) { + LOG.debug("total delete {} objects for dir {}", totalObjects, absolutePath); + } return Status.OK; } catch (DdlException e) { LOG.warn("deleteObjects:", e); @@ -286,13 +276,15 @@ public Status copyObject(String origFilePath, String destFilePath) { .destinationBucket(descUri.getBucket()) .destinationKey(descUri.getKey()) .build()); - LOG.info("copy file from " + origFilePath + " to " + destFilePath + " success: " + response.toString()); + if (LOG.isDebugEnabled()) { + LOG.debug("copy file from {} to {} success: {} ", origFilePath, destFilePath, response); + } return Status.OK; } catch (S3Exception e) { - LOG.error("copy file failed: ", e); + LOG.warn("copy file failed: ", e); return new Status(Status.ErrCode.COMMON_ERROR, "copy file failed: " + e.getMessage()); } catch (UserException ue) { - LOG.error("copy to s3 failed: ", ue); + LOG.warn("copy to s3 failed: ", ue); return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); } } @@ -392,7 +384,7 @@ public Status multipartUpload(String remotePath, @Nullable InputStream inputStre .build(); getClient().abortMultipartUpload(abortMultipartUploadRequest); } catch (Exception e1) { - LOG.warn("Failed to abort multipartUpload " + remotePath, e1); + LOG.warn("Failed to abort multipartUpload {}", remotePath, e1); } } } @@ -407,7 +399,7 @@ ListObjectsV2Response listObjectsV2(ListObjectsV2Request request) throws UserExc * List all files under the given path with glob pattern. * For example, if the path is "s3://bucket/path/to/*.csv", * it will list all files under "s3://bucket/path/to/" with ".csv" suffix. - * + *

* Copy from `AzureObjStorage.GlobList` */ public Status globList(String remotePath, List result, boolean fileNameOnly) { @@ -420,15 +412,17 @@ public Status globList(String remotePath, List result, boolean fileN String bucket = uri.getBucket(); String globPath = uri.getKey(); // eg: path/to/*.csv - LOG.info("globList globPath:{}, remotePath:{}", globPath, remotePath); - + if (LOG.isDebugEnabled()) { + LOG.debug("globList globPath:{}, remotePath:{}", globPath, remotePath); + } java.nio.file.Path pathPattern = Paths.get(globPath); PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + pathPattern); HashSet directorySet = new HashSet<>(); String listPrefix = S3Util.getLongestPrefix(globPath); // similar to Azure - LOG.info("globList listPrefix: {}", listPrefix); - + if (LOG.isDebugEnabled()) { + LOG.debug("globList listPrefix: {}", listPrefix); + } ListObjectsV2Request request = ListObjectsV2Request.builder() .bucket(bucket) .prefix(listPrefix) @@ -474,8 +468,8 @@ public Status globList(String remotePath, List result, boolean fileN isTruncated = response.isTruncated(); if (isTruncated) { request = request.toBuilder() - .continuationToken(response.nextContinuationToken()) - .build(); + .continuationToken(response.nextContinuationToken()) + .build(); } } while (isTruncated); @@ -489,9 +483,11 @@ public Status globList(String remotePath, List result, boolean fileN } finally { long endTime = System.nanoTime(); long duration = endTime - startTime; - LOG.info("process {} elements under prefix {} for {} round, match {} elements, take {} ms", - elementCnt, remotePath, roundCnt, matchCnt, - duration / 1000 / 1000); + if (LOG.isDebugEnabled()) { + LOG.debug("process {} elements under prefix {} for {} round, match {} elements, take {} ms", + elementCnt, remotePath, roundCnt, matchCnt, + duration / 1000 / 1000); + } } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java index 097c64a744b844..a30e2f09b367a4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java @@ -20,28 +20,19 @@ import org.apache.doris.analysis.StorageBackend.StorageType; import org.apache.doris.backup.Status; import org.apache.doris.common.UserException; +import org.apache.doris.datasource.property.storage.AzureProperties; import org.apache.doris.fs.obj.AzureObjStorage; -import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.fs.FileSystem; import java.util.List; -import java.util.Map; public class AzureFileSystem extends ObjFileSystem { - public AzureFileSystem(Map properties) { - super(StorageType.AZURE.name(), StorageType.S3, new AzureObjStorage(properties)); - initFsProperties(); - } - - @VisibleForTesting - public AzureFileSystem(AzureObjStorage storage) { - super(StorageType.AZURE.name(), StorageType.S3, storage); - initFsProperties(); - } - private void initFsProperties() { - this.properties.putAll(((AzureObjStorage) objStorage).getProperties()); + public AzureFileSystem(AzureProperties azureProperties) { + super(StorageType.AZURE.name(), StorageType.S3, new AzureObjStorage(azureProperties)); + this.storageProperties = azureProperties; + this.properties.putAll(storageProperties.getOrigProps()); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/BrokerFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/BrokerFileSystem.java index 5b9ee8aaeca1cd..b70e1d37de9a61 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/BrokerFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/BrokerFileSystem.java @@ -27,6 +27,7 @@ import org.apache.doris.common.UserException; import org.apache.doris.common.util.BrokerUtil; import org.apache.doris.datasource.property.PropertyConverter; +import org.apache.doris.datasource.property.storage.BrokerProperties; import org.apache.doris.fs.operations.BrokerFileOperations; import org.apache.doris.fs.operations.OpParams; import org.apache.doris.service.FrontendOptions; @@ -81,6 +82,8 @@ public BrokerFileSystem(String name, Map properties) { properties.putAll(PropertyConverter.convertToHadoopFSProperties(properties)); this.properties = properties; this.operations = new BrokerFileOperations(name, properties); + // support broker properties in future + this.storageProperties = new BrokerProperties(properties); } public Pair getBroker() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFileSystem.java index 290ee37e383cc6..a9ea0f4e1b34a2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFileSystem.java @@ -21,7 +21,6 @@ import org.apache.doris.backup.Status; import org.apache.doris.common.UserException; import org.apache.doris.fs.PersistentFileSystem; -import org.apache.doris.fs.remote.dfs.DFSFileSystem; import com.google.common.collect.ImmutableSet; import org.apache.hadoop.fs.FileStatus; @@ -53,10 +52,6 @@ protected org.apache.hadoop.fs.FileSystem nativeFileSystem(String remotePath) th throw new UserException("Not support to getFileSystem."); } - public boolean ifNotSetFallbackToSimpleAuth() { - return properties.getOrDefault(DFSFileSystem.PROP_ALLOW_FALLBACK_TO_SIMPLE_AUTH, "").isEmpty(); - } - @Override public Status listFiles(String remotePath, boolean recursive, List result) { try { @@ -139,4 +134,8 @@ public void close() throws IOException { fsLock.unlock(); } } + + public boolean connectivityTest(List filePaths) throws UserException { + return true; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java index 7a7a426c470172..da168b1a162f75 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java @@ -20,36 +20,36 @@ import org.apache.doris.analysis.StorageBackend; import org.apache.doris.backup.Status; import org.apache.doris.common.UserException; -import org.apache.doris.common.security.authentication.AuthenticationConfig; import org.apache.doris.common.security.authentication.HadoopAuthenticator; -import org.apache.doris.datasource.property.PropertyConverter; -import org.apache.doris.datasource.property.constants.S3Properties; +import org.apache.doris.common.util.S3URI; +import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties; import org.apache.doris.fs.obj.S3ObjStorage; -import org.apache.doris.fs.remote.dfs.DFSFileSystem; -import com.amazonaws.services.s3.model.AmazonS3Exception; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Strings; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import software.amazon.awssdk.services.s3.S3Client; -import java.io.FileNotFoundException; -import java.io.IOException; +import java.util.HashSet; import java.util.List; -import java.util.Map; +import java.util.Set; public class S3FileSystem extends ObjFileSystem { private static final Logger LOG = LogManager.getLogger(S3FileSystem.class); private HadoopAuthenticator authenticator = null; + private AbstractS3CompatibleProperties s3Properties; - public S3FileSystem(Map properties) { - super(StorageBackend.StorageType.S3.name(), StorageBackend.StorageType.S3, new S3ObjStorage(properties)); + + public S3FileSystem(AbstractS3CompatibleProperties s3Properties) { + + super(StorageBackend.StorageType.S3.name(), StorageBackend.StorageType.S3, + new S3ObjStorage(s3Properties)); + this.s3Properties = s3Properties; + this.storageProperties = s3Properties; initFsProperties(); + } @VisibleForTesting @@ -59,107 +59,44 @@ public S3FileSystem(S3ObjStorage storage) { } private void initFsProperties() { - this.properties.putAll(((S3ObjStorage) objStorage).getProperties()); + this.properties.putAll(storageProperties.getOrigProps()); } + @Override protected FileSystem nativeFileSystem(String remotePath) throws UserException { - //todo Extracting a common method to achieve logic reuse - if (closed.get()) { - throw new UserException("FileSystem is closed."); - } - if (dfsFileSystem == null) { - synchronized (this) { - if (closed.get()) { - throw new UserException("FileSystem is closed."); - } - if (dfsFileSystem == null) { - Configuration conf = DFSFileSystem.getHdfsConf(ifNotSetFallbackToSimpleAuth()); - System.setProperty("com.amazonaws.services.s3.enableV4", "true"); - // the entry value in properties may be null, and - PropertyConverter.convertToHadoopFSProperties(properties).entrySet().stream() - .filter(entry -> entry.getKey() != null && entry.getValue() != null) - .forEach(entry -> conf.set(entry.getKey(), entry.getValue())); - // S3 does not support Kerberos authentication, - // so here we create a simple authentication - AuthenticationConfig authConfig = AuthenticationConfig.getSimpleAuthenticationConfig(conf); - HadoopAuthenticator authenticator = HadoopAuthenticator.getHadoopAuthenticator(authConfig); - try { - dfsFileSystem = authenticator.doAs(() -> { - try { - return FileSystem.get(new Path(remotePath).toUri(), conf); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - this.authenticator = authenticator; - RemoteFSPhantomManager.registerPhantomReference(this); - } catch (Exception e) { - throw new UserException("Failed to get S3 FileSystem for " + e.getMessage(), e); - } - } - } - } - return dfsFileSystem; + throw new UserException("S3 does not support native file system"); } // broker file pattern glob is too complex, so we use hadoop directly - private Status globListImplV1(String remotePath, List result, boolean fileNameOnly) { - try { - FileSystem s3AFileSystem = nativeFileSystem(remotePath); - Path pathPattern = new Path(remotePath); - FileStatus[] files = s3AFileSystem.globStatus(pathPattern); - if (files == null) { - return Status.OK; - } - for (FileStatus fileStatus : files) { - RemoteFile remoteFile = new RemoteFile( - fileNameOnly ? fileStatus.getPath().getName() : fileStatus.getPath().toString(), - !fileStatus.isDirectory(), fileStatus.isDirectory() ? -1 : fileStatus.getLen(), - fileStatus.getBlockSize(), fileStatus.getModificationTime()); - result.add(remoteFile); - } - if (LOG.isDebugEnabled()) { - LOG.debug("remotePath:{}, result:{}", remotePath, result); - } - - } catch (FileNotFoundException e) { - LOG.info("file not found: " + e.getMessage()); - return new Status(Status.ErrCode.NOT_FOUND, "file not found: " + e.getMessage()); - } catch (Exception e) { - if (e.getCause() instanceof AmazonS3Exception) { - // process minio error msg - AmazonS3Exception ea = (AmazonS3Exception) e.getCause(); - Map callbackHeaders = ea.getHttpHeaders(); - if (callbackHeaders != null && !callbackHeaders.isEmpty()) { - String minioErrMsg = callbackHeaders.get("X-Minio-Error-Desc"); - if (minioErrMsg != null) { - return new Status(Status.ErrCode.COMMON_ERROR, "Minio request error: " + minioErrMsg); - } - } - } - LOG.error("errors while get file status ", e); - return new Status(Status.ErrCode.COMMON_ERROR, "errors while get file status " + e.getMessage()); - } - return Status.OK; - } - - private Status globListImplV2(String remotePath, List result, boolean fileNameOnly) { - return ((S3ObjStorage) objStorage).globList(remotePath, result, fileNameOnly); + @Override + public Status globList(String remotePath, List result, boolean fileNameOnly) { + S3ObjStorage objStorage = (S3ObjStorage) this.objStorage; + return objStorage.globList(remotePath, result, fileNameOnly); } @Override - public Status globList(String remotePath, List result, boolean fileNameOnly) { - if (!Strings.isNullOrEmpty(properties.get(S3Properties.ROLE_ARN)) - || !Strings.isNullOrEmpty(properties.get(S3Properties.Env.ROLE_ARN))) { - // https://issues.apache.org/jira/browse/HADOOP-19201 - // hadoop 3.3.6 we used now, not support aws assumed role with external id, so we - // write a globListImplV2 to support it - LOG.info("aws role arn mode, use globListImplV2"); - return globListImplV2(remotePath, result, fileNameOnly); + public boolean connectivityTest(List filePaths) throws UserException { + if (filePaths == null || filePaths.isEmpty()) { + throw new UserException("File paths cannot be null or empty for connectivity test."); } - - return globListImplV1(remotePath, result, fileNameOnly); + S3ObjStorage objStorage = (S3ObjStorage) this.objStorage; + try { + S3Client s3Client = objStorage.getClient(); + Set bucketNames = new HashSet<>(); + boolean usePathStyle = Boolean.parseBoolean(s3Properties.getUsePathStyle()); + boolean forceParsingByStandardUri = Boolean.parseBoolean(s3Properties.getForceParsingByStandardUrl()); + for (String filePath : filePaths) { + S3URI s3uri; + s3uri = S3URI.create(filePath, usePathStyle, forceParsingByStandardUri); + bucketNames.add(s3uri.getBucket()); + } + bucketNames.forEach(bucketName -> s3Client.headBucket(b -> b.bucket(bucketName))); + return true; + } catch (Exception e) { + LOG.warn("S3 connectivityTest error: {}", e.getMessage(), e); + } + return false; } @VisibleForTesting diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java index 7b57f4d76d797b..cdb55053a200b9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java @@ -18,8 +18,10 @@ package org.apache.doris.fs.remote; import org.apache.doris.backup.Status; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.ExternalMetaCacheMgr; import org.apache.doris.fs.FileSystem; +import org.apache.doris.fs.FileSystemCache; import java.util.List; import java.util.Map; @@ -121,16 +123,10 @@ public Status listDirectories(String remotePath, Set result) { } public FileSystem fileSystem(String location) { - // todo: This method is currently unused. - // LocationPath has already been adapted to the new V2 logic. - // We’re keeping this code commented out for now, but it will be fully removed once - // V2 is finalized and fully adopted. - /* return extMetaCacheMgr.getFsCache().getRemoteFileSystem( + return extMetaCacheMgr.getFsCache().getRemoteFileSystem( new FileSystemCache.FileSystemCacheKey( LocationPath.getFSIdentity(location, properties, - bindBrokerName), properties, bindBrokerName));*/ - // - return null; + bindBrokerName), properties, bindBrokerName)); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java index 963dfbd56da4ad..5d9869b817e58c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java @@ -20,9 +20,9 @@ import org.apache.doris.analysis.StorageBackend; import org.apache.doris.backup.Status; import org.apache.doris.common.UserException; -import org.apache.doris.common.security.authentication.AuthenticationConfig; import org.apache.doris.common.security.authentication.HadoopAuthenticator; import org.apache.doris.common.util.URI; +import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties; import org.apache.doris.fs.operations.HDFSFileOperations; import org.apache.doris.fs.operations.HDFSOpParams; import org.apache.doris.fs.operations.OpParams; @@ -55,7 +55,6 @@ import java.nio.file.Paths; import java.util.Comparator; import java.util.List; -import java.util.Map; public class DFSFileSystem extends RemoteFileSystem { @@ -63,14 +62,19 @@ public class DFSFileSystem extends RemoteFileSystem { private static final Logger LOG = LogManager.getLogger(DFSFileSystem.class); private HDFSFileOperations operations = null; private HadoopAuthenticator authenticator = null; + private HdfsCompatibleProperties hdfsProperties; - public DFSFileSystem(Map properties) { - this(StorageBackend.StorageType.HDFS, properties); + public DFSFileSystem(HdfsCompatibleProperties hdfsProperties) { + super(StorageBackend.StorageType.HDFS.name(), StorageBackend.StorageType.HDFS); + this.properties.putAll(hdfsProperties.getOrigProps()); + this.storageProperties = hdfsProperties; + this.hdfsProperties = hdfsProperties; } - public DFSFileSystem(StorageBackend.StorageType type, Map properties) { - super(type.name(), type); - this.properties.putAll(properties); + public DFSFileSystem(HdfsCompatibleProperties hdfsProperties, StorageBackend.StorageType storageType) { + super(storageType.name(), storageType); + this.properties.putAll(hdfsProperties.getOrigProps()); + this.hdfsProperties = hdfsProperties; } @VisibleForTesting @@ -85,12 +89,11 @@ public FileSystem nativeFileSystem(String remotePath) throws UserException { throw new UserException("FileSystem is closed."); } if (dfsFileSystem == null) { - Configuration conf = getHdfsConf(ifNotSetFallbackToSimpleAuth()); - for (Map.Entry propEntry : properties.entrySet()) { - conf.set(propEntry.getKey(), propEntry.getValue()); - } - AuthenticationConfig authConfig = AuthenticationConfig.getKerberosConfig(conf); - authenticator = HadoopAuthenticator.getHadoopAuthenticator(authConfig); + Configuration conf = hdfsProperties.getHadoopConfiguration(); + // TODO: Temporarily disable the HDFS file system cache to prevent instances from being closed by + // each other in V1. This line can be removed once V1 and V2 are unified. + conf.set("fs.hdfs.impl.disable.cache", "true"); + authenticator = HadoopAuthenticator.getHadoopAuthenticator(conf); try { dfsFileSystem = authenticator.doAs(() -> { try { diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/JFSFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/JFSFileSystem.java index ffabb211d0821a..200a7f3908e80a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/JFSFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/JFSFileSystem.java @@ -18,11 +18,10 @@ package org.apache.doris.fs.remote.dfs; import org.apache.doris.analysis.StorageBackend; - -import java.util.Map; +import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties; public class JFSFileSystem extends DFSFileSystem { - public JFSFileSystem(Map properties) { - super(StorageBackend.StorageType.JFS, properties); + public JFSFileSystem(HdfsCompatibleProperties hdfsProperties) { + super(hdfsProperties, StorageBackend.StorageType.JFS); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/OFSFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/OFSFileSystem.java index dd69a300392a63..a061fdda730bbd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/OFSFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/OFSFileSystem.java @@ -18,11 +18,10 @@ package org.apache.doris.fs.remote.dfs; import org.apache.doris.analysis.StorageBackend; - -import java.util.Map; +import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties; public class OFSFileSystem extends DFSFileSystem { - public OFSFileSystem(Map properties) { - super(StorageBackend.StorageType.OFS, properties); + public OFSFileSystem(HdfsCompatibleProperties properties) { + super(properties, StorageBackend.StorageType.OFS); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/OSSHdfsFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/OSSHdfsFileSystem.java similarity index 96% rename from fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/OSSHdfsFileSystem.java rename to fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/OSSHdfsFileSystem.java index 50469050fcbd5c..636cb6ecb84c40 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/OSSHdfsFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/OSSHdfsFileSystem.java @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -package org.apache.doris.fsv2.remote.dfs; +package org.apache.doris.fs.remote.dfs; import org.apache.doris.analysis.StorageBackend; import org.apache.doris.datasource.property.storage.OSSHdfsProperties; diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/DirectoryLister.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/DirectoryLister.java deleted file mode 100644 index 7440d15166e738..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/DirectoryLister.java +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/trinodb/trino/blob/438/plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/DirectoryLister.java -// and modified by Doris - -package org.apache.doris.fsv2; - -import org.apache.doris.catalog.TableIf; -import org.apache.doris.fsv2.remote.RemoteFile; - -public interface DirectoryLister { - RemoteIterator listFiles(FileSystem fs, boolean recursive, TableIf table, String location) - throws FileSystemIOException; -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystem.java deleted file mode 100644 index b1084680839016..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystem.java +++ /dev/null @@ -1,97 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.backup.Status; -import org.apache.doris.fsv2.remote.RemoteFile; - -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * File system interface. - * All file operations should use DFSFileSystem. - * @see org.apache.doris.fsv2.remote.dfs.DFSFileSystem - * If the file system use the object storage's SDK, use ObjStorage - * @see org.apache.doris.fsv2.remote.ObjFileSystem - * Read and Write operation put in FileOperations - * @see org.apache.doris.fsv2.operations.FileOperations - */ -public interface FileSystem { - Map getProperties(); - - Status exists(String remotePath); - - default Status directoryExists(String dir) { - return exists(dir); - } - - Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize); - - Status upload(String localPath, String remotePath); - - Status directUpload(String content, String remoteFile); - - Status rename(String origFilePath, String destFilePath); - - default Status renameDir(String origFilePath, String destFilePath) { - return renameDir(origFilePath, destFilePath, () -> {}); - } - - default Status renameDir(String origFilePath, - String destFilePath, - Runnable runWhenPathNotExist) { - throw new UnsupportedOperationException("Unsupported operation rename dir on current file system."); - } - - Status delete(String remotePath); - - default Status deleteDirectory(String dir) { - return delete(dir); - } - - Status makeDir(String remotePath); - - Status listFiles(String remotePath, boolean recursive, List result); - - /** - * List files in remotePath by wildcard
- * The {@link RemoteFile}'name will only contain file name (Not full path) - * @param remotePath remote path - * @param result All eligible files under the path - * @return - */ - default Status globList(String remotePath, List result) { - return globList(remotePath, result, true); - } - - /** - * List files in remotePath by wildcard
- * @param remotePath remote path - * @param result All eligible files under the path - * @param fileNameOnly for {@link RemoteFile}'name: whether the full path is included.
- * true: only contains file name, false: contains full path
- * @return - */ - Status globList(String remotePath, List result, boolean fileNameOnly); - - default Status listDirectories(String remotePath, Set result) { - throw new UnsupportedOperationException("Unsupported operation list directories on current file system."); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemCache.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemCache.java deleted file mode 100644 index 578f76b8275479..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemCache.java +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.common.CacheFactory; -import org.apache.doris.common.Config; -import org.apache.doris.common.Pair; -import org.apache.doris.common.UserException; -import org.apache.doris.fsv2.remote.RemoteFileSystem; - -import com.github.benmanes.caffeine.cache.LoadingCache; -import org.apache.hadoop.conf.Configuration; - -import java.util.HashMap; -import java.util.Map; -import java.util.Objects; -import java.util.OptionalLong; - -public class FileSystemCache { - - private final LoadingCache fileSystemCache; - - public FileSystemCache() { - // no need to set refreshAfterWrite, because the FileSystem is created once and never changed - CacheFactory fsCacheFactory = new CacheFactory( - OptionalLong.of(86400L), - OptionalLong.empty(), - Config.max_remote_file_system_cache_num, - false, - null); - fileSystemCache = fsCacheFactory.buildCache(this::loadFileSystem); - } - - private RemoteFileSystem loadFileSystem(FileSystemCacheKey key) throws UserException { - return FileSystemFactory.get(key.type, key.getFsProperties(), key.bindBrokerName); - } - - public RemoteFileSystem getRemoteFileSystem(FileSystemCacheKey key) { - return fileSystemCache.get(key); - } - - public static class FileSystemCacheKey { - private final FileSystemType type; - // eg: hdfs://nameservices1 - private final String fsIdent; - private final Map properties; - private final String bindBrokerName; - // only for creating new file system - private final Configuration conf; - - public FileSystemCacheKey(Pair fs, - Map properties, - String bindBrokerName, - Configuration conf) { - this.type = fs.first; - this.fsIdent = fs.second; - this.properties = properties; - this.bindBrokerName = bindBrokerName; - this.conf = conf; - } - - public FileSystemCacheKey(Pair fs, - Map properties, String bindBrokerName) { - this(fs, properties, bindBrokerName, null); - } - - public Map getFsProperties() { - if (conf == null) { - return properties; - } - Map result = new HashMap<>(); - conf.iterator().forEachRemaining(e -> result.put(e.getKey(), e.getValue())); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (!(obj instanceof FileSystemCacheKey)) { - return false; - } - FileSystemCacheKey o = (FileSystemCacheKey) obj; - boolean equalsWithoutBroker = type.equals(o.type) - && fsIdent.equals(o.fsIdent) - && properties.equals(o.properties); - if (bindBrokerName == null) { - return equalsWithoutBroker && o.bindBrokerName == null; - } - return equalsWithoutBroker && bindBrokerName.equals(o.bindBrokerName); - } - - @Override - public int hashCode() { - if (bindBrokerName == null) { - return Objects.hash(properties, fsIdent, type); - } - return Objects.hash(properties, fsIdent, type, bindBrokerName); - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemDirectoryLister.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemDirectoryLister.java deleted file mode 100644 index 6fe1b110783470..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemDirectoryLister.java +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.backup.Status; -import org.apache.doris.catalog.TableIf; -import org.apache.doris.fsv2.remote.RemoteFile; - -import java.util.ArrayList; -import java.util.List; - -public class FileSystemDirectoryLister implements DirectoryLister { - public RemoteIterator listFiles(FileSystem fs, boolean recursive, TableIf table, String location) - throws FileSystemIOException { - List result = new ArrayList<>(); - Status status = fs.listFiles(location, recursive, result); - if (!status.ok()) { - throw new FileSystemIOException(status.getErrCode(), status.getErrMsg()); - } - return new RemoteFileRemoteIterator(result); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemFactory.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemFactory.java deleted file mode 100644 index 162d440f2d05b5..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemFactory.java +++ /dev/null @@ -1,82 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.analysis.BrokerDesc; -import org.apache.doris.analysis.StorageBackend; -import org.apache.doris.common.UserException; -import org.apache.doris.datasource.property.storage.StorageProperties; -import org.apache.doris.fsv2.remote.BrokerFileSystem; -import org.apache.doris.fsv2.remote.RemoteFileSystem; - -import java.util.List; -import java.util.Map; - -public class FileSystemFactory { - - public static RemoteFileSystem get(Map properties) throws UserException { - StorageProperties storageProperties = StorageProperties.createPrimary(properties); - return get(storageProperties); - } - - public static RemoteFileSystem get(StorageBackend.StorageType storageType, Map properties) - throws UserException { - if (storageType.equals(StorageBackend.StorageType.BROKER)) { - return new BrokerFileSystem("broker", properties); - } - StorageProperties storageProperties = StorageProperties.createPrimary(properties); - return get(storageProperties); - } - - public static RemoteFileSystem get(StorageProperties storageProperties) { - return StorageTypeMapper.create(storageProperties); - } - - // This method is a temporary workaround for handling properties. - // It will be removed when broker properties are officially supported. - public static RemoteFileSystem get(String name, Map properties) { - return new BrokerFileSystem(name, properties); - } - - public static RemoteFileSystem get(FileSystemType fileSystemType, Map properties, - String bindBrokerName) - throws UserException { - if (fileSystemType == FileSystemType.BROKER) { - return new BrokerFileSystem(bindBrokerName, properties); - } - List storagePropertiesList = StorageProperties.createAll(properties); - - for (StorageProperties storageProperties : storagePropertiesList) { - if (storageProperties.getStorageName().equalsIgnoreCase(fileSystemType.name())) { - return StorageTypeMapper.create(storageProperties); - } - } - throw new RuntimeException("Unsupported file system type: " + fileSystemType); - } - - public static RemoteFileSystem get(BrokerDesc brokerDesc) { - if (null != brokerDesc.getStorageProperties()) { - return get(brokerDesc.getStorageProperties()); - } - if (null != brokerDesc.getStorageType() - && brokerDesc.getStorageType().equals(StorageBackend.StorageType.BROKER)) { - return new BrokerFileSystem(brokerDesc.getName(), brokerDesc.getProperties()); - } - throw new RuntimeException("Unexpected storage type: " + brokerDesc.getStorageType()); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemIOException.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemIOException.java deleted file mode 100644 index 5e1e569b76a449..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemIOException.java +++ /dev/null @@ -1,65 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.backup.Status.ErrCode; - -import java.io.IOException; -import java.util.Optional; -import javax.annotation.Nullable; - -public class FileSystemIOException extends IOException { - - @Nullable - private ErrCode errCode; - - public FileSystemIOException(ErrCode errCode, String message) { - super(message); - this.errCode = errCode; - } - - public FileSystemIOException(ErrCode errCode, String message, Throwable cause) { - super(message, cause); - this.errCode = errCode; - } - - public FileSystemIOException(String message) { - super(message); - this.errCode = null; - } - - public FileSystemIOException(String message, Throwable cause) { - super(message, cause); - this.errCode = null; - } - - public Optional getErrorCode() { - return Optional.ofNullable(errCode); - } - - @Override - public String getMessage() { - if (errCode != null) { - return String.format("[%s]: %s", - errCode, - super.getMessage()); - } else { - return super.getMessage(); - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemProvider.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemProvider.java deleted file mode 100644 index 4fc5ac316f0333..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemProvider.java +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.datasource.SessionContext; - -public interface FileSystemProvider { - FileSystem get(SessionContext ctx); -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemProviderImpl.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemProviderImpl.java deleted file mode 100644 index f664012c6ac5a0..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemProviderImpl.java +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.datasource.ExternalMetaCacheMgr; -import org.apache.doris.datasource.SessionContext; -import org.apache.doris.fsv2.remote.SwitchingFileSystem; - -import java.util.Map; - -public class FileSystemProviderImpl implements FileSystemProvider { - private ExternalMetaCacheMgr extMetaCacheMgr; - private String bindBrokerName; - - private Map properties; - - public FileSystemProviderImpl(ExternalMetaCacheMgr extMetaCacheMgr, String bindBrokerName, - Map properties) { - this.extMetaCacheMgr = extMetaCacheMgr; - this.bindBrokerName = bindBrokerName; - this.properties = properties; - } - - @Override - public FileSystem get(SessionContext ctx) { - return new SwitchingFileSystem(extMetaCacheMgr, bindBrokerName, properties); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemType.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemType.java deleted file mode 100644 index f586da232c84f9..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemType.java +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -// TODO: [FileSystemType Unification] -// There are currently multiple definitions of file system types across the codebase, including but not limited to: -// 1. Backend module (e.g., FileSystemBackendType) -// 2. Location/path parsing logic (e.g., LocationType or string-based tags) -// 3. This enum: FileSystemType (used in the SPI/plugin layer) -// -// Problem: -// - File system type definitions are scattered across different modules with inconsistent naming and granularity -// - Adding a new type requires changes in multiple places, increasing risk of bugs and maintenance overhead -// - Difficult to maintain and error-prone -// -// Refactoring Goal: -// - Consolidate file system type definitions into a single source of truth -// - Clearly define the semantics and usage of each type (e.g., remote vs local, object storage vs file system) -// - All modules should reference the unified definition to avoid duplication and hardcoded strings -// -// Suggested Approach: -// - Create a centralized `FsType` enum/class as the canonical definition -// - Provide mapping or adapter methods where needed (e.g., map LocationType to FsType) -// - Gradually deprecate other definitions and annotate them with @Deprecated, including migration instructions -// -public enum FileSystemType { - S3, - HDFS, - OFS, - JFS, - BROKER, - FILE, - AZURE -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemUtil.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemUtil.java deleted file mode 100644 index a3cbf6369fb213..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/FileSystemUtil.java +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.backup.Status; - -import org.apache.hadoop.fs.Path; - -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.Executor; -import java.util.concurrent.atomic.AtomicBoolean; - -public class FileSystemUtil { - - public static void asyncRenameFiles(FileSystem fs, - Executor executor, - List> renameFileFutures, - AtomicBoolean cancelled, - String origFilePath, - String destFilePath, - List fileNames) { - for (String fileName : fileNames) { - Path source = new Path(origFilePath, fileName); - Path target = new Path(destFilePath, fileName); - renameFileFutures.add(CompletableFuture.runAsync(() -> { - if (cancelled.get()) { - return; - } - Status status = fs.rename(source.toString(), target.toString()); - if (!status.ok()) { - throw new RuntimeException(status.getErrMsg()); - } - }, executor)); - } - } - - public static void asyncRenameDir(FileSystem fs, - Executor executor, - List> renameFileFutures, - AtomicBoolean cancelled, - String origFilePath, - String destFilePath, - Runnable runWhenPathNotExist) { - renameFileFutures.add(CompletableFuture.runAsync(() -> { - if (cancelled.get()) { - return; - } - Status status = fs.renameDir(origFilePath, destFilePath, runWhenPathNotExist); - if (!status.ok()) { - throw new RuntimeException(status.getErrMsg()); - } - }, executor)); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/LocalDfsFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/LocalDfsFileSystem.java deleted file mode 100644 index 4b2a12e8597b49..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/LocalDfsFileSystem.java +++ /dev/null @@ -1,199 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.backup.Status; -import org.apache.doris.fsv2.remote.RemoteFile; - -import com.google.common.collect.ImmutableSet; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Set; - -public class LocalDfsFileSystem implements FileSystem { - - public LocalFileSystem fs = LocalFileSystem.getLocal(new Configuration()); - - public LocalDfsFileSystem() throws IOException { - } - - @Override - public Map getProperties() { - return null; - } - - @Override - public Status directoryExists(String dir) { - return exists(dir); - } - - @Override - public Status exists(String remotePath) { - boolean exists = false; - try { - exists = fs.exists(new Path(remotePath)); - } catch (IOException e) { - throw new RuntimeException(e); - } - if (exists) { - return Status.OK; - } else { - return new Status(Status.ErrCode.NOT_FOUND, ""); - } - } - - @Override - public Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize) { - return null; - } - - @Override - public Status upload(String localPath, String remotePath) { - return null; - } - - @Override - public Status directUpload(String content, String remoteFile) { - return null; - } - - @Override - public Status rename(String origFilePath, String destFilePath) { - try { - fs.rename(new Path(origFilePath), new Path(destFilePath)); - } catch (IOException e) { - throw new RuntimeException(e); - } - return Status.OK; - } - - @Override - public Status renameDir(String origFilePath, String destFilePath, Runnable runWhenPathNotExist) { - Status status = exists(destFilePath); - if (status.ok()) { - throw new RuntimeException("Destination directory already exists: " + destFilePath); - } - String targetParent = new Path(destFilePath).getParent().toString(); - status = exists(targetParent); - if (Status.ErrCode.NOT_FOUND.equals(status.getErrCode())) { - status = makeDir(targetParent); - } - if (!status.ok()) { - throw new RuntimeException(status.getErrMsg()); - } - - runWhenPathNotExist.run(); - - return rename(origFilePath, destFilePath); - } - - @Override - public Status delete(String remotePath) { - try { - fs.delete(new Path(remotePath), true); - } catch (IOException e) { - throw new RuntimeException(e); - } - return Status.OK; - } - - @Override - public Status makeDir(String remotePath) { - try { - fs.mkdirs(new Path(remotePath)); - } catch (IOException e) { - throw new RuntimeException(e); - } - return Status.OK; - } - - @Override - public Status globList(String remotePath, List result, boolean fileNameOnly) { - try { - FileStatus[] locatedFileStatusRemoteIterator = fs.globStatus(new Path(remotePath)); - if (locatedFileStatusRemoteIterator == null) { - return Status.OK; - } - for (FileStatus fileStatus : locatedFileStatusRemoteIterator) { - RemoteFile remoteFile = new RemoteFile( - fileNameOnly ? fileStatus.getPath().getName() : fileStatus.getPath().toString(), - !fileStatus.isDirectory(), fileStatus.isDirectory() ? -1 : fileStatus.getLen(), - fileStatus.getBlockSize(), fileStatus.getModificationTime()); - result.add(remoteFile); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - return Status.OK; - } - - @Override - public Status listFiles(String remotePath, boolean recursive, List result) { - try { - Path locatedPath = new Path(remotePath); - RemoteIterator locatedFiles = fs.listFiles(locatedPath, recursive); - while (locatedFiles.hasNext()) { - LocatedFileStatus fileStatus = locatedFiles.next(); - RemoteFile location = new RemoteFile( - fileStatus.getPath(), fileStatus.isDirectory(), fileStatus.getLen(), - fileStatus.getBlockSize(), fileStatus.getModificationTime(), fileStatus.getBlockLocations()); - result.add(location); - } - } catch (FileNotFoundException e) { - return new Status(Status.ErrCode.NOT_FOUND, e.getMessage()); - } catch (Exception e) { - return new Status(Status.ErrCode.COMMON_ERROR, e.getMessage()); - } - return Status.OK; - } - - @Override - public Status listDirectories(String remotePath, Set result) { - try { - FileStatus[] fileStatuses = fs.listStatus(new Path(remotePath)); - result.addAll( - Arrays.stream(fileStatuses) - .filter(FileStatus::isDirectory) - .map(file -> file.getPath().toString() + "/") - .collect(ImmutableSet.toImmutableSet())); - } catch (IOException e) { - return new Status(Status.ErrCode.COMMON_ERROR, e.getMessage()); - } - return Status.OK; - } - - public void createFile(String path) throws IOException { - Path path1 = new Path(path); - if (!exists(path1.getParent().toString()).ok()) { - makeDir(path1.getParent().toString()); - } - FSDataOutputStream build = fs.createFile(path1).build(); - build.close(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/PersistentFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/PersistentFileSystem.java deleted file mode 100644 index a18a1615a24da1..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/PersistentFileSystem.java +++ /dev/null @@ -1,95 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.analysis.StorageBackend; -import org.apache.doris.common.UserException; -import org.apache.doris.common.io.Text; -import org.apache.doris.datasource.property.storage.StorageProperties; -import org.apache.doris.persist.gson.GsonPreProcessable; - -import com.google.common.collect.Maps; -import com.google.gson.annotations.SerializedName; -import lombok.Getter; - -import java.io.DataInput; -import java.io.IOException; -import java.util.Map; - -/** - * Use for persistence, Repository will persist properties of file system. - */ -public abstract class PersistentFileSystem implements FileSystem, GsonPreProcessable { - public static final String STORAGE_TYPE = "_DORIS_STORAGE_TYPE_"; - @SerializedName("prop") - public Map properties = Maps.newHashMap(); - @SerializedName("n") - public String name; - public StorageBackend.StorageType type; - - @Getter - protected StorageProperties storageProperties; - - public PersistentFileSystem(String name, StorageBackend.StorageType type) { - this.name = name; - this.type = type; - } - - public String getName() { - return name; - } - - public Map getProperties() { - return properties; - } - - public StorageBackend.StorageType getStorageType() { - return type; - } - - /** - * - * @param in persisted data - * @return file systerm - */ - @Deprecated - public static PersistentFileSystem read(DataInput in) throws IOException { - Text.readString(in); - Map properties = Maps.newHashMap(); - int size = in.readInt(); - for (int i = 0; i < size; i++) { - String key = Text.readString(in); - String value = Text.readString(in); - properties.put(key, value); - } - if (properties.containsKey(STORAGE_TYPE)) { - properties.remove(STORAGE_TYPE); - } - try { - return FileSystemFactory.get(properties); - } catch (UserException e) { - // do we ignore this exception? - throw new IOException("Failed to create file system from properties: " + properties, e); - } - } - - @Override - public void gsonPreProcess() { - properties.put(STORAGE_TYPE, type.name()); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteFileRemoteIterator.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteFileRemoteIterator.java deleted file mode 100644 index 1ee41aaecd01bd..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteFileRemoteIterator.java +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.fsv2.remote.RemoteFile; - -import java.util.List; -import java.util.NoSuchElementException; -import java.util.Objects; - -public class RemoteFileRemoteIterator - implements RemoteIterator { - private final List remoteFileList; - private int currentIndex = 0; - - public RemoteFileRemoteIterator(List remoteFileList) { - this.remoteFileList = Objects.requireNonNull(remoteFileList, "iterator is null"); - } - - @Override - public boolean hasNext() throws FileSystemIOException { - return currentIndex < remoteFileList.size(); - } - - @Override - public RemoteFile next() throws FileSystemIOException { - if (!hasNext()) { - throw new NoSuchElementException("No more elements in RemoteFileRemoteIterator"); - } - return remoteFileList.get(currentIndex++); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteFiles.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteFiles.java deleted file mode 100644 index 54a80af5891165..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteFiles.java +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2; - -import org.apache.doris.fs.remote.RemoteFile; - -import java.util.List; - -public class RemoteFiles { - - private final List files; - - public RemoteFiles(List files) { - this.files = files; - } - - public List files() { - return files; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteIterator.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteIterator.java deleted file mode 100644 index 9f93e3eb549173..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/RemoteIterator.java +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/trinodb/trino/blob/438/plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/RemoteIterator.java -// and modified by Doris - -package org.apache.doris.fsv2; - -public interface RemoteIterator { - boolean hasNext() throws FileSystemIOException; - - T next() throws FileSystemIOException; -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/SimpleRemoteIterator.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/SimpleRemoteIterator.java deleted file mode 100644 index a631241fbcdeb7..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/SimpleRemoteIterator.java +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - - -package org.apache.doris.fsv2; - -import org.apache.doris.fsv2.remote.RemoteFile; - -import java.util.Iterator; -import java.util.Objects; -// This file is copied from -// https://github.com/trinodb/trino/blob/438/plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/SimpleRemoteIterator.java -// and modified by Doris - -class SimpleRemoteIterator implements RemoteIterator { - private final Iterator iterator; - - public SimpleRemoteIterator(Iterator iterator) { - this.iterator = Objects.requireNonNull(iterator, "iterator is null"); - } - - @Override - public boolean hasNext() throws FileSystemIOException { - return iterator.hasNext(); - } - - @Override - public RemoteFile next() throws FileSystemIOException { - return iterator.next(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionDirectoryListingCacheKey.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionDirectoryListingCacheKey.java deleted file mode 100644 index ba4f80c3762b0b..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionDirectoryListingCacheKey.java +++ /dev/null @@ -1,64 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/trinodb/trino/blob/438/plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/TransactionDirectoryListingCacheKey.java -// and modified by Doris - -package org.apache.doris.fsv2; - -import java.util.Objects; - -public class TransactionDirectoryListingCacheKey { - - private final long transactionId; - private final String path; - - public TransactionDirectoryListingCacheKey(long transactionId, String path) { - this.transactionId = transactionId; - this.path = Objects.requireNonNull(path, "path is null"); - } - - public String getPath() { - return path; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - TransactionDirectoryListingCacheKey that = (TransactionDirectoryListingCacheKey) o; - return transactionId == that.transactionId && path.equals(that.path); - } - - @Override - public int hashCode() { - return Objects.hash(transactionId, path); - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("TransactionDirectoryListingCacheKey{"); - sb.append("transactionId=").append(transactionId); - sb.append(", path='").append(path).append('\''); - sb.append('}'); - return sb.toString(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionScopeCachingDirectoryLister.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionScopeCachingDirectoryLister.java deleted file mode 100644 index f7bdc5e3c05d09..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionScopeCachingDirectoryLister.java +++ /dev/null @@ -1,219 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/trinodb/trino/blob/438/plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/TransactionScopeCachingDirectoryLister.java -// and modified by Doris - -package org.apache.doris.fsv2; - -import org.apache.doris.catalog.TableIf; -import org.apache.doris.fsv2.remote.RemoteFile; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Throwables; -import com.google.common.cache.Cache; -import com.google.common.util.concurrent.UncheckedExecutionException; -import com.google.errorprone.annotations.concurrent.GuardedBy; -import org.apache.commons.collections.ListUtils; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.Objects; -import java.util.Optional; -import java.util.concurrent.ExecutionException; -import javax.annotation.Nullable; - -/** - * Caches directory content (including listings that were started concurrently). - * {@link TransactionScopeCachingDirectoryLister} assumes that all listings - * are performed by same user within single transaction, therefore any failure can - * be shared between concurrent listings. - */ -public class TransactionScopeCachingDirectoryLister implements DirectoryLister { - private final long transactionId; - - @VisibleForTesting - public Cache getCache() { - return cache; - } - - //TODO use a cache key based on Path & SchemaTableName and iterate over the cache keys - // to deal more efficiently with cache invalidation scenarios for partitioned tables. - private final Cache cache; - private final DirectoryLister delegate; - - public TransactionScopeCachingDirectoryLister(DirectoryLister delegate, long transactionId, - Cache cache) { - this.delegate = Objects.requireNonNull(delegate, "delegate is null"); - this.transactionId = transactionId; - this.cache = Objects.requireNonNull(cache, "cache is null"); - } - - @Override - public RemoteIterator listFiles(FileSystem fs, boolean recursive, TableIf table, String location) - throws FileSystemIOException { - return listInternal(fs, recursive, table, new TransactionDirectoryListingCacheKey(transactionId, location)); - } - - private RemoteIterator listInternal(FileSystem fs, boolean recursive, TableIf table, - TransactionDirectoryListingCacheKey cacheKey) - throws FileSystemIOException { - FetchingValueHolder cachedValueHolder; - try { - cachedValueHolder = cache.get(cacheKey, - () -> new FetchingValueHolder(createListingRemoteIterator(fs, recursive, table, cacheKey))); - } catch (ExecutionException | UncheckedExecutionException e) { - Throwable throwable = e.getCause(); - Throwables.throwIfInstanceOf(throwable, FileSystemIOException.class); - Throwables.throwIfUnchecked(throwable); - throw new RuntimeException("Failed to list directory: " + cacheKey.getPath(), throwable); - } - - if (cachedValueHolder.isFullyCached()) { - return new SimpleRemoteIterator(cachedValueHolder.getCachedFiles()); - } - - return cachingRemoteIterator(cachedValueHolder, cacheKey); - } - - private RemoteIterator createListingRemoteIterator(FileSystem fs, boolean recursive, - TableIf table, - TransactionDirectoryListingCacheKey cacheKey) - throws FileSystemIOException { - return delegate.listFiles(fs, recursive, table, cacheKey.getPath()); - } - - - private RemoteIterator cachingRemoteIterator(FetchingValueHolder cachedValueHolder, - TransactionDirectoryListingCacheKey cacheKey) { - return new RemoteIterator() { - private int fileIndex; - - @Override - public boolean hasNext() - throws FileSystemIOException { - try { - boolean hasNext = cachedValueHolder.getCachedFile(fileIndex).isPresent(); - // Update cache weight of cachedValueHolder for a given path. - // The cachedValueHolder acts as an invalidation guard. - // If a cache invalidation happens while this iterator goes over the files from the specified path, - // the eventually outdated file listing will not be added anymore to the cache. - cache.asMap().replace(cacheKey, cachedValueHolder, cachedValueHolder); - return hasNext; - } catch (Exception exception) { - // invalidate cached value to force retry of directory listing - cache.invalidate(cacheKey); - throw exception; - } - } - - @Override - public RemoteFile next() - throws FileSystemIOException { - // force cache entry weight update in case next file is cached - Preconditions.checkState(hasNext()); - return cachedValueHolder.getCachedFile(fileIndex++).orElseThrow(NoSuchElementException::new); - } - }; - } - - @VisibleForTesting - boolean isCached(String location) { - return isCached(new TransactionDirectoryListingCacheKey(transactionId, location)); - } - - @VisibleForTesting - boolean isCached(TransactionDirectoryListingCacheKey cacheKey) { - FetchingValueHolder cached = cache.getIfPresent(cacheKey); - return cached != null && cached.isFullyCached(); - } - - static class FetchingValueHolder { - - private final List cachedFiles = ListUtils.synchronizedList(new ArrayList()); - - @GuardedBy("this") - @Nullable - private RemoteIterator fileIterator; - @GuardedBy("this") - @Nullable - private Exception exception; - - public FetchingValueHolder(RemoteIterator fileIterator) { - this.fileIterator = Objects.requireNonNull(fileIterator, "fileIterator is null"); - } - - public synchronized boolean isFullyCached() { - return fileIterator == null && exception == null; - } - - public long getCacheFileCount() { - return cachedFiles.size(); - } - - public Iterator getCachedFiles() { - Preconditions.checkState(isFullyCached()); - return cachedFiles.iterator(); - } - - public Optional getCachedFile(int index) - throws FileSystemIOException { - int filesSize = cachedFiles.size(); - Preconditions.checkArgument(index >= 0 && index <= filesSize, - "File index (%s) out of bounds [0, %s]", index, filesSize); - - // avoid fileIterator synchronization (and thus blocking) for already cached files - if (index < filesSize) { - return Optional.of(cachedFiles.get(index)); - } - - return fetchNextCachedFile(index); - } - - private synchronized Optional fetchNextCachedFile(int index) - throws FileSystemIOException { - if (exception != null) { - throw new FileSystemIOException("Exception while listing directory", exception); - } - - if (index < cachedFiles.size()) { - // file was fetched concurrently - return Optional.of(cachedFiles.get(index)); - } - - try { - if (fileIterator == null || !fileIterator.hasNext()) { - // no more files - fileIterator = null; - return Optional.empty(); - } - - RemoteFile fileStatus = fileIterator.next(); - cachedFiles.add(fileStatus); - return Optional.of(fileStatus); - } catch (Exception exception) { - fileIterator = null; - this.exception = exception; - throw exception; - } - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionScopeCachingDirectoryListerFactory.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionScopeCachingDirectoryListerFactory.java deleted file mode 100644 index 24511201c0b542..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/TransactionScopeCachingDirectoryListerFactory.java +++ /dev/null @@ -1,59 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/trinodb/trino/blob/438/plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/TransactionScopeCachingDirectoryListerFactory.java -// and modified by Doris - -package org.apache.doris.fsv2; - -import org.apache.doris.common.EvictableCacheBuilder; -import org.apache.doris.fsv2.TransactionScopeCachingDirectoryLister.FetchingValueHolder; - -import com.google.common.cache.Cache; - -import java.util.Optional; -import java.util.concurrent.atomic.AtomicLong; - -public class TransactionScopeCachingDirectoryListerFactory { - //TODO use a cache key based on Path & SchemaTableName and iterate over the cache keys - // to deal more efficiently with cache invalidation scenarios for partitioned tables. - // private final Optional> cache; - - private final Optional> cache; - - private final AtomicLong nextTransactionId = new AtomicLong(); - - public TransactionScopeCachingDirectoryListerFactory(long maxSize) { - if (maxSize > 0) { - EvictableCacheBuilder cacheBuilder = - EvictableCacheBuilder.newBuilder() - .maximumWeight(maxSize) - .weigher((key, value) -> - Math.toIntExact(value.getCacheFileCount())); - this.cache = Optional.of(cacheBuilder.build()); - } else { - cache = Optional.empty(); - } - } - - public DirectoryLister get(DirectoryLister delegate) { - return cache - .map(cache -> (DirectoryLister) new TransactionScopeCachingDirectoryLister(delegate, - nextTransactionId.getAndIncrement(), cache)) - .orElse(delegate); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/AzureObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/AzureObjStorage.java deleted file mode 100644 index 0ae0811e73cb73..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/AzureObjStorage.java +++ /dev/null @@ -1,381 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.obj; - -import org.apache.doris.backup.Status; -import org.apache.doris.common.DdlException; -import org.apache.doris.common.UserException; -import org.apache.doris.common.util.S3URI; -import org.apache.doris.datasource.property.storage.AzureProperties; -import org.apache.doris.fsv2.remote.RemoteFile; - -import com.azure.core.http.rest.PagedIterable; -import com.azure.core.http.rest.PagedResponse; -import com.azure.core.http.rest.Response; -import com.azure.core.util.Context; -import com.azure.storage.blob.BlobClient; -import com.azure.storage.blob.BlobContainerClient; -import com.azure.storage.blob.BlobServiceClient; -import com.azure.storage.blob.BlobServiceClientBuilder; -import com.azure.storage.blob.batch.BlobBatch; -import com.azure.storage.blob.batch.BlobBatchClient; -import com.azure.storage.blob.batch.BlobBatchClientBuilder; -import com.azure.storage.blob.models.BlobErrorCode; -import com.azure.storage.blob.models.BlobItem; -import com.azure.storage.blob.models.BlobProperties; -import com.azure.storage.blob.models.BlobStorageException; -import com.azure.storage.blob.models.ListBlobsOptions; -import com.azure.storage.common.StorageSharedKeyCredential; -import org.apache.commons.lang3.tuple.Triple; -import org.apache.http.HttpStatus; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.jetbrains.annotations.Nullable; - -import java.io.File; -import java.io.InputStream; -import java.nio.file.FileSystems; -import java.nio.file.PathMatcher; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; - -public class AzureObjStorage implements ObjStorage { - private static final Logger LOG = LogManager.getLogger(AzureObjStorage.class); - private static final String URI_TEMPLATE = "https://%s.blob.core.windows.net"; - - protected AzureProperties azureProperties; - private BlobServiceClient client; - private boolean isUsePathStyle; - - private boolean forceParsingByStandardUri; - - public AzureObjStorage(AzureProperties azureProperties) { - this.azureProperties = azureProperties; - this.isUsePathStyle = Boolean.parseBoolean(azureProperties.getUsePathStyle()); - this.forceParsingByStandardUri = Boolean.parseBoolean(azureProperties.getForceParsingByStandardUrl()); - } - - // To ensure compatibility with S3 usage, the path passed by the user still starts with 'S3://${containerName}'. - // For Azure, we need to remove this part. - private static String removeUselessSchema(String remotePath) { - String prefix = "s3://"; - - if (remotePath.startsWith(prefix)) { - remotePath = remotePath.substring(prefix.length()); - } - // Remove the useless container name - int firstSlashIndex = remotePath.indexOf('/'); - return remotePath.substring(firstSlashIndex + 1); - } - - - @Override - public BlobServiceClient getClient() throws UserException { - if (client == null) { - String uri = String.format(URI_TEMPLATE, azureProperties.getAccessKey()); - StorageSharedKeyCredential cred = new StorageSharedKeyCredential(azureProperties.getAccessKey(), - azureProperties.getSecretKey()); - BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); - builder.credential(cred); - builder.endpoint(uri); - client = builder.buildClient(); - } - return client; - } - - @Override - public Triple getStsToken() throws DdlException { - return null; - } - - @Override - public Status headObject(String remotePath) { - try { - S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - BlobClient blobClient = getClient().getBlobContainerClient(uri.getBucket()).getBlobClient(uri.getKey()); - LOG.info("headObject remotePath:{} bucket:{} key:{} properties:{}", - remotePath, uri.getBucket(), uri.getKey(), blobClient.getProperties()); - return Status.OK; - } catch (BlobStorageException e) { - if (e.getStatusCode() == HttpStatus.SC_NOT_FOUND) { - return new Status(Status.ErrCode.NOT_FOUND, "remote path does not exist: " + remotePath); - } else { - LOG.warn("headObject {} failed:", remotePath, e); - return new Status(Status.ErrCode.COMMON_ERROR, "headObject " - + remotePath + " failed: " + e.getMessage()); - } - } catch (UserException e) { - return new Status(Status.ErrCode.COMMON_ERROR, "headObject " - + remotePath + " failed: " + e.getMessage()); - } - } - - @Override - public Status getObject(String remoteFilePath, File localFile) { - try { - S3URI uri = S3URI.create(remoteFilePath, isUsePathStyle, forceParsingByStandardUri); - BlobClient blobClient = getClient().getBlobContainerClient(uri.getBucket()).getBlobClient(uri.getKey()); - BlobProperties properties = blobClient.downloadToFile(localFile.getAbsolutePath()); - LOG.info("get file " + remoteFilePath + " success: " + properties.toString()); - return Status.OK; - } catch (BlobStorageException e) { - return new Status( - Status.ErrCode.COMMON_ERROR, - "get file from azure error: " + e.getServiceMessage()); - } catch (UserException e) { - return new Status(Status.ErrCode.COMMON_ERROR, "getObject " - + remoteFilePath + " failed: " + e.getMessage()); - } - } - - @Override - public Status putObject(String remotePath, @Nullable InputStream content, long contentLength) { - try { - S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - BlobClient blobClient = getClient().getBlobContainerClient(uri.getBucket()).getBlobClient(uri.getKey()); - blobClient.upload(content, contentLength); - return Status.OK; - } catch (BlobStorageException e) { - return new Status( - Status.ErrCode.COMMON_ERROR, - "Error occurred while copying the blob:: " + e.getServiceMessage()); - } catch (UserException e) { - return new Status(Status.ErrCode.COMMON_ERROR, "putObject " - + remotePath + " failed: " + e.getMessage()); - } - } - - @Override - public Status deleteObject(String remotePath) { - try { - S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - BlobClient blobClient = getClient().getBlobContainerClient(uri.getBucket()).getBlobClient(uri.getKey()); - blobClient.delete(); - LOG.info("delete file " + remotePath + " success"); - return Status.OK; - } catch (BlobStorageException e) { - if (e.getErrorCode() == BlobErrorCode.BLOB_NOT_FOUND) { - return Status.OK; - } - return new Status( - Status.ErrCode.COMMON_ERROR, - "get file from azure error: " + e.getServiceMessage()); - } catch (UserException e) { - return new Status(Status.ErrCode.COMMON_ERROR, "deleteObject " - + remotePath + " failed: " + e.getMessage()); - } - } - - @Override - public Status deleteObjects(String remotePath) { - try { - S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - BlobContainerClient blobClient = getClient().getBlobContainerClient(uri.getBucket()); - String containerUrl = blobClient.getBlobContainerUrl(); - String continuationToken = ""; - boolean isTruncated = false; - long totalObjects = 0; - do { - RemoteObjects objects = listObjects(remotePath, continuationToken); - List objectList = objects.getObjectList(); - if (!objectList.isEmpty()) { - BlobBatchClient blobBatchClient = new BlobBatchClientBuilder( - getClient()).buildClient(); - BlobBatch blobBatch = blobBatchClient.getBlobBatch(); - - for (RemoteObject blob : objectList) { - blobBatch.deleteBlob(containerUrl, blob.getKey()); - } - Response resp = blobBatchClient.submitBatchWithResponse(blobBatch, true, null, Context.NONE); - LOG.info("{} objects deleted for dir {} return http code {}", - objectList.size(), remotePath, resp.getStatusCode()); - totalObjects += objectList.size(); - } - - isTruncated = objects.isTruncated(); - continuationToken = objects.getContinuationToken(); - } while (isTruncated); - LOG.info("total delete {} objects for dir {}", totalObjects, remotePath); - return Status.OK; - } catch (BlobStorageException e) { - return new Status(Status.ErrCode.COMMON_ERROR, "list objects for delete objects failed: " + e.getMessage()); - } catch (Exception e) { - LOG.warn(String.format("delete objects %s failed", remotePath), e); - return new Status(Status.ErrCode.COMMON_ERROR, "delete objects failed: " + e.getMessage()); - } - } - - @Override - public Status copyObject(String origFilePath, String destFilePath) { - try { - S3URI origUri = S3URI.create(origFilePath, isUsePathStyle, forceParsingByStandardUri); - S3URI destUri = S3URI.create(destFilePath, isUsePathStyle, forceParsingByStandardUri); - BlobClient sourceBlobClient = getClient().getBlobContainerClient(origUri.getBucket()) - .getBlobClient(origUri.getKey()); - BlobClient destinationBlobClient = getClient().getBlobContainerClient(destUri.getBucket()) - .getBlobClient(destUri.getKey()); - destinationBlobClient.beginCopy(sourceBlobClient.getBlobUrl(), null); - LOG.info("Blob copied from " + origFilePath + " to " + destFilePath); - return Status.OK; - } catch (BlobStorageException e) { - return new Status( - Status.ErrCode.COMMON_ERROR, - "Error occurred while copying the blob:: " + e.getServiceMessage()); - } catch (UserException e) { - return new Status(Status.ErrCode.COMMON_ERROR, "copyObject from " - + origFilePath + "to " + destFilePath + " failed: " + e.getMessage()); - } - } - - @Override - public RemoteObjects listObjects(String remotePath, String continuationToken) throws DdlException { - try { - ListBlobsOptions options = new ListBlobsOptions().setPrefix(remotePath); - //S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - PagedIterable pagedBlobs = getClient().getBlobContainerClient("selectdb-qa-datalake-test") - .listBlobs(options, continuationToken, null); - PagedResponse pagedResponse = pagedBlobs.iterableByPage().iterator().next(); - List remoteObjects = new ArrayList<>(); - - for (BlobItem blobItem : pagedResponse.getElements()) { - remoteObjects.add(new RemoteObject(blobItem.getName(), "", blobItem.getProperties().getETag(), - blobItem.getProperties().getContentLength())); - } - return new RemoteObjects(remoteObjects, pagedResponse.getContinuationToken() != null, - pagedResponse.getContinuationToken()); - } catch (BlobStorageException e) { - LOG.warn(String.format("Failed to list objects for S3: %s", remotePath), e); - throw new DdlException("Failed to list objects for S3, Error message: " + e.getMessage(), e); - } catch (UserException e) { - LOG.warn(String.format("Failed to list objects for S3: %s", remotePath), e); - throw new DdlException("Failed to list objects for S3, Error message: " + e.getMessage(), e); - } - } - - // Due to historical reasons, when the BE parses the object storage path. - // It assumes the path starts with 'S3://${containerName}' - // So here the path needs to be constructed in a format that BE can parse. - private String constructS3Path(String fileName, String bucket) throws UserException { - LOG.debug("the path is {}", String.format("s3://%s/%s", bucket, fileName)); - return String.format("s3://%s/%s", bucket, fileName); - } - - public static String getLongestPrefix(String globPattern) { - int length = globPattern.length(); - int earliestSpecialCharIndex = length; - - char[] specialChars = {'*', '?', '[', '{', '\\'}; - - for (char specialChar : specialChars) { - int index = globPattern.indexOf(specialChar); - if (index != -1 && index < earliestSpecialCharIndex) { - earliestSpecialCharIndex = index; - } - } - - return globPattern.substring(0, earliestSpecialCharIndex); - } - - public Status globList(String remotePath, List result, boolean fileNameOnly) { - long roundCnt = 0; - long elementCnt = 0; - long matchCnt = 0; - long startTime = System.nanoTime(); - Status st = Status.OK; - try { - S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - String globPath = uri.getKey(); - String bucket = uri.getBucket(); - LOG.info("try to glob list for azure, remote path {}, orig {}", globPath, remotePath); - BlobContainerClient client = getClient().getBlobContainerClient(bucket); - java.nio.file.Path pathPattern = Paths.get(globPath); - LOG.info("path pattern {}", pathPattern.toString()); - PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + pathPattern); - - HashSet directorySet = new HashSet<>(); - String listPrefix = getLongestPrefix(globPath); - LOG.info("azure glob list prefix is {}", listPrefix); - ListBlobsOptions options = new ListBlobsOptions().setPrefix(listPrefix); - String newContinuationToken = null; - do { - roundCnt++; - PagedResponse pagedResponse = getPagedBlobItems(client, options, newContinuationToken); - - for (BlobItem blobItem : pagedResponse.getElements()) { - elementCnt++; - java.nio.file.Path blobPath = Paths.get(blobItem.getName()); - - boolean isPrefix = false; - while (blobPath.normalize().toString().startsWith(listPrefix)) { - if (LOG.isDebugEnabled()) { - LOG.debug("get blob {}", blobPath.normalize().toString()); - } - if (!matcher.matches(blobPath)) { - isPrefix = true; - blobPath = blobPath.getParent(); - continue; - } - if (directorySet.contains(blobPath.normalize().toString())) { - break; - } - if (isPrefix) { - directorySet.add(blobPath.normalize().toString()); - } - - matchCnt++; - RemoteFile remoteFile = new RemoteFile( - fileNameOnly ? blobPath.getFileName().toString() : constructS3Path(blobPath.toString(), - uri.getBucket()), - !isPrefix, - isPrefix ? -1 : blobItem.getProperties().getContentLength(), - isPrefix ? -1 : blobItem.getProperties().getContentLength(), - isPrefix ? 0 : blobItem.getProperties().getLastModified().getSecond()); - result.add(remoteFile); - - blobPath = blobPath.getParent(); - isPrefix = true; - } - } - newContinuationToken = pagedResponse.getContinuationToken(); - } while (newContinuationToken != null); - - } catch (BlobStorageException e) { - LOG.warn("glob file " + remotePath + " failed because azure error: " + e.getMessage()); - st = new Status(Status.ErrCode.COMMON_ERROR, "glob file " + remotePath - + " failed because azure error: " + e.getMessage()); - } catch (Exception e) { - LOG.warn("errors while glob file " + remotePath, e); - st = new Status(Status.ErrCode.COMMON_ERROR, "errors while glob file " + remotePath + e.getMessage()); - } finally { - long endTime = System.nanoTime(); - long duration = endTime - startTime; - LOG.info("process {} elements under prefix {} for {} round, match {} elements, take {} micro second", - remotePath, elementCnt, roundCnt, matchCnt, - duration / 1000); - } - return st; - } - - public PagedResponse getPagedBlobItems(BlobContainerClient client, ListBlobsOptions options, - String newContinuationToken) { - PagedIterable pagedBlobs = client.listBlobs(options, newContinuationToken, null); - return pagedBlobs.iterableByPage().iterator().next(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/ObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/ObjStorage.java deleted file mode 100644 index f45e4b1eebbe7e..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/ObjStorage.java +++ /dev/null @@ -1,69 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.obj; - -import org.apache.doris.backup.Status; -import org.apache.doris.common.DdlException; -import org.apache.doris.common.UserException; - -import org.apache.commons.lang3.tuple.Triple; -import org.jetbrains.annotations.Nullable; - -import java.io.File; -import java.io.InputStream; - -/** - * It is just used for reading remote object storage on cloud. - * @param cloud SDK Client - */ -public interface ObjStorage { - - // CHUNK_SIZE for multi part upload - int CHUNK_SIZE = 5 * 1024 * 1024; - - C getClient() throws UserException; - - Triple getStsToken() throws DdlException; - - Status headObject(String remotePath); - - Status getObject(String remoteFilePath, File localFile); - - Status putObject(String remotePath, @Nullable InputStream content, long contentLenghth); - - Status deleteObject(String remotePath); - - Status deleteObjects(String remotePath); - - Status copyObject(String origFilePath, String destFilePath); - - RemoteObjects listObjects(String remotePath, String continuationToken) throws DdlException; - - default String normalizePrefix(String prefix) { - return prefix.isEmpty() ? "" : (prefix.endsWith("/") ? prefix : String.format("%s/", prefix)); - } - - default String getRelativePath(String prefix, String key) throws DdlException { - String expectedPrefix = normalizePrefix(prefix); - if (!key.startsWith(expectedPrefix)) { - throw new DdlException( - "List a object whose key: " + key + " does not start with object prefix: " + expectedPrefix); - } - return key.substring(expectedPrefix.length()); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/RemoteObject.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/RemoteObject.java deleted file mode 100644 index e84413ca419c66..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/RemoteObject.java +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.obj; - -public class RemoteObject { - private final String key; - private final String relativePath; - private final String etag; - private final long size; - - public RemoteObject(String key, String relativePath, String etag, long size) { - this.key = key; - this.relativePath = relativePath; - this.etag = etag; - this.size = size; - } - - public String getKey() { - return key; - } - - public String getRelativePath() { - return relativePath; - } - - public String getEtag() { - return etag; - } - - public long getSize() { - return size; - } - - @Override - public String toString() { - return "RemoteObject{" + "key='" + key + '\'' + ", relativePath='" + relativePath + '\'' + ", etag='" + etag - + '\'' + ", size=" + size + '}'; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/RemoteObjects.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/RemoteObjects.java deleted file mode 100644 index b3cc9fa9254816..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/RemoteObjects.java +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.obj; - -import java.util.List; - -public class RemoteObjects { - private final List objectList; - - private final boolean isTruncated; - - private final String continuationToken; - - public RemoteObjects(List objectList, boolean isTruncated, String continuationToken) { - this.objectList = objectList; - this.isTruncated = isTruncated; - this.continuationToken = continuationToken; - } - - public List getObjectList() { - return objectList; - } - - public boolean isTruncated() { - return isTruncated; - } - - public String getContinuationToken() { - return continuationToken; - } - - @Override - public String toString() { - return "RemoteObjects{" + "objectList=" + objectList + ", isTruncated=" + isTruncated - + ", continuationToken='" + continuationToken + '\'' + '}'; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/S3ObjStorage.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/S3ObjStorage.java deleted file mode 100644 index 761fad75734451..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/obj/S3ObjStorage.java +++ /dev/null @@ -1,493 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.obj; - -import org.apache.doris.backup.Status; -import org.apache.doris.common.DdlException; -import org.apache.doris.common.UserException; -import org.apache.doris.common.util.S3URI; -import org.apache.doris.common.util.S3Util; -import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties; -import org.apache.doris.fsv2.remote.RemoteFile; - -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.tuple.Triple; -import org.apache.http.HttpStatus; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.jetbrains.annotations.Nullable; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload; -import software.amazon.awssdk.services.s3.model.CompletedPart; -import software.amazon.awssdk.services.s3.model.CopyObjectRequest; -import software.amazon.awssdk.services.s3.model.CopyObjectResponse; -import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; -import software.amazon.awssdk.services.s3.model.Delete; -import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; -import software.amazon.awssdk.services.s3.model.DeleteObjectResponse; -import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; -import software.amazon.awssdk.services.s3.model.DeleteObjectsResponse; -import software.amazon.awssdk.services.s3.model.GetObjectRequest; -import software.amazon.awssdk.services.s3.model.GetObjectResponse; -import software.amazon.awssdk.services.s3.model.HeadObjectRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectResponse; -import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; -import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; -import software.amazon.awssdk.services.s3.model.ObjectIdentifier; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import software.amazon.awssdk.services.s3.model.S3Exception; -import software.amazon.awssdk.services.s3.model.S3Object; -import software.amazon.awssdk.services.s3.model.UploadPartRequest; -import software.amazon.awssdk.services.s3.model.UploadPartResponse; - -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.InputStream; -import java.net.URI; -import java.nio.file.FileSystems; -import java.nio.file.PathMatcher; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; -import java.util.stream.Collectors; - -public class S3ObjStorage implements ObjStorage { - private static final Logger LOG = LogManager.getLogger(S3ObjStorage.class); - private S3Client client; - - protected Map properties; - - protected AbstractS3CompatibleProperties s3Properties; - - private boolean isUsePathStyle = false; - - private boolean forceParsingByStandardUri = false; - - public S3ObjStorage(AbstractS3CompatibleProperties properties) { - this.properties = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - setProperties(properties); - } - - public Map getProperties() { - return properties; - } - - protected void setProperties(AbstractS3CompatibleProperties properties) { - this.s3Properties = properties; - isUsePathStyle = Boolean.parseBoolean(properties.getUsePathStyle()); - forceParsingByStandardUri = Boolean.parseBoolean(s3Properties.getForceParsingByStandardUrl()); - } - - @Override - public S3Client getClient() throws UserException { - if (client == null) { - String endpointStr = s3Properties.getEndpoint(); - if (!endpointStr.contains("://")) { - endpointStr = "http://" + endpointStr; - } - URI endpoint = URI.create(endpointStr); - client = S3Util.buildS3Client(endpoint, s3Properties.getRegion(), - isUsePathStyle, s3Properties.getAwsCredentialsProvider()); - } - return client; - } - - - @Override - public Triple getStsToken() throws DdlException { - return null; - } - - @Override - public Status headObject(String remotePath) { - try { - S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - HeadObjectResponse response = getClient() - .headObject(HeadObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build()); - if (LOG.isDebugEnabled()) { - LOG.debug("headObject success: {}, response: {}", remotePath, response); - } - return Status.OK; - } catch (S3Exception e) { - if (e.statusCode() == HttpStatus.SC_NOT_FOUND) { - return new Status(Status.ErrCode.NOT_FOUND, "remote path does not exist: " + remotePath); - } else { - LOG.warn("headObject failed:", e); - return new Status(Status.ErrCode.COMMON_ERROR, "headObject failed: " + e.getMessage()); - } - } catch (UserException ue) { - LOG.warn("connect to s3 failed: ", ue); - return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); - } - } - - @Override - public Status getObject(String remoteFilePath, File localFile) { - try { - S3URI uri = S3URI.create(remoteFilePath, isUsePathStyle, forceParsingByStandardUri); - GetObjectResponse response = getClient().getObject( - GetObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build(), localFile.toPath()); - if (LOG.isDebugEnabled()) { - LOG.debug("get file {} success: {}", remoteFilePath, response); - } - return Status.OK; - } catch (S3Exception s3Exception) { - return new Status( - Status.ErrCode.COMMON_ERROR, - "get file from s3 error: " + s3Exception.awsErrorDetails().errorMessage()); - } catch (UserException ue) { - LOG.warn("connect to s3 failed: ", ue); - return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); - } catch (Exception e) { - return new Status(Status.ErrCode.COMMON_ERROR, e.toString()); - } - } - - @Override - public Status putObject(String remotePath, @Nullable InputStream content, long contentLength) { - try { - S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - RequestBody body = RequestBody.fromInputStream(content, contentLength); - PutObjectResponse response = - getClient() - .putObject( - PutObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build(), - body); - if (LOG.isDebugEnabled()) { - LOG.debug("put object success: {}", response); - } - return Status.OK; - } catch (S3Exception e) { - LOG.warn("put object failed: ", e); - return new Status(Status.ErrCode.COMMON_ERROR, "put object failed: " + e.getMessage()); - } catch (Exception ue) { - LOG.warn("connect to s3 failed: ", ue); - return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); - } - } - - @Override - public Status deleteObject(String remotePath) { - try { - S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - DeleteObjectResponse response = - getClient() - .deleteObject( - DeleteObjectRequest.builder().bucket(uri.getBucket()).key(uri.getKey()).build()); - if (LOG.isDebugEnabled()) { - LOG.debug("delete file {} success: {}", remotePath, response); - } - return Status.OK; - } catch (S3Exception e) { - LOG.warn("delete file failed: ", e); - if (e.statusCode() == HttpStatus.SC_NOT_FOUND) { - return Status.OK; - } - return new Status(Status.ErrCode.COMMON_ERROR, "delete file failed: " + e.getMessage()); - } catch (UserException ue) { - LOG.warn("connect to s3 failed: ", ue); - return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); - } - } - - @Override - public Status deleteObjects(String absolutePath) { - try { - S3URI baseUri = S3URI.create(absolutePath, isUsePathStyle, forceParsingByStandardUri); - String continuationToken = ""; - boolean isTruncated = false; - long totalObjects = 0; - do { - RemoteObjects objects = listObjects(absolutePath, continuationToken); - List objectList = objects.getObjectList(); - if (!objectList.isEmpty()) { - Delete delete = Delete.builder() - .objects(objectList.stream() - .map(RemoteObject::getKey) - .map(k -> ObjectIdentifier.builder().key(k).build()) - .collect(Collectors.toList())) - .build(); - DeleteObjectsRequest req = DeleteObjectsRequest.builder() - .bucket(baseUri.getBucket()) - .delete(delete) - .build(); - - DeleteObjectsResponse resp = getClient().deleteObjects(req); - if (!resp.errors().isEmpty()) { - LOG.warn("{} errors returned while deleting {} objects for dir {}", - resp.errors().size(), objectList.size(), absolutePath); - } - if (LOG.isDebugEnabled()) { - LOG.debug("{} of {} objects deleted for dir {}", - resp.deleted().size(), objectList.size(), absolutePath); - totalObjects += objectList.size(); - } - } - - isTruncated = objects.isTruncated(); - continuationToken = objects.getContinuationToken(); - } while (isTruncated); - if (LOG.isDebugEnabled()) { - LOG.debug("total delete {} objects for dir {}", totalObjects, absolutePath); - } - return Status.OK; - } catch (DdlException e) { - LOG.warn("deleteObjects:", e); - return new Status(Status.ErrCode.COMMON_ERROR, "list objects for delete objects failed: " + e.getMessage()); - } catch (Exception e) { - LOG.warn(String.format("delete objects %s failed", absolutePath), e); - return new Status(Status.ErrCode.COMMON_ERROR, "delete objects failed: " + e.getMessage()); - } - } - - @Override - public Status copyObject(String origFilePath, String destFilePath) { - try { - S3URI origUri = S3URI.create(origFilePath, isUsePathStyle, forceParsingByStandardUri); - S3URI descUri = S3URI.create(destFilePath, isUsePathStyle, forceParsingByStandardUri); - CopyObjectResponse response = getClient() - .copyObject( - CopyObjectRequest.builder() - .copySource(origUri.getBucket() + "/" + origUri.getKey()) - .destinationBucket(descUri.getBucket()) - .destinationKey(descUri.getKey()) - .build()); - if (LOG.isDebugEnabled()) { - LOG.debug("copy file from {} to {} success: {} ", origFilePath, destFilePath, response); - } - return Status.OK; - } catch (S3Exception e) { - LOG.warn("copy file failed: ", e); - return new Status(Status.ErrCode.COMMON_ERROR, "copy file failed: " + e.getMessage()); - } catch (UserException ue) { - LOG.warn("copy to s3 failed: ", ue); - return new Status(Status.ErrCode.COMMON_ERROR, "connect to s3 failed: " + ue.getMessage()); - } - } - - @Override - public RemoteObjects listObjects(String absolutePath, String continuationToken) throws DdlException { - try { - S3URI uri = S3URI.create(absolutePath, isUsePathStyle, forceParsingByStandardUri); - String bucket = uri.getBucket(); - String prefix = uri.getKey(); - ListObjectsV2Request.Builder requestBuilder = ListObjectsV2Request.builder() - .bucket(bucket) - .prefix(normalizePrefix(prefix)); - if (!StringUtils.isEmpty(continuationToken)) { - requestBuilder.continuationToken(continuationToken); - } - ListObjectsV2Response response = getClient().listObjectsV2(requestBuilder.build()); - List remoteObjects = new ArrayList<>(); - for (S3Object c : response.contents()) { - String relativePath = getRelativePath(prefix, c.key()); - remoteObjects.add(new RemoteObject(c.key(), relativePath, c.eTag(), c.size())); - } - return new RemoteObjects(remoteObjects, response.isTruncated(), response.nextContinuationToken()); - } catch (Exception e) { - LOG.warn(String.format("Failed to list objects for S3: %s", absolutePath), e); - throw new DdlException("Failed to list objects for S3, Error message: " + e.getMessage(), e); - } - } - - public Status multipartUpload(String remotePath, @Nullable InputStream inputStream, long totalBytes) { - Status st = Status.OK; - long uploadedBytes = 0; - int bytesRead = 0; - byte[] buffer = new byte[CHUNK_SIZE]; - int partNumber = 1; - - String uploadId = null; - S3URI uri = null; - Map etags = new HashMap<>(); - - try { - uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - CreateMultipartUploadRequest createMultipartUploadRequest = CreateMultipartUploadRequest.builder() - .bucket(uri.getBucket()) - .key(uri.getKey()) - .build(); - CreateMultipartUploadResponse createMultipartUploadResponse = getClient() - .createMultipartUpload(createMultipartUploadRequest); - - uploadId = createMultipartUploadResponse.uploadId(); - - while (uploadedBytes < totalBytes && (bytesRead = inputStream.read(buffer)) != -1) { - uploadedBytes += bytesRead; - UploadPartRequest uploadPartRequest = UploadPartRequest.builder() - .bucket(uri.getBucket()) - .key(uri.getKey()) - .uploadId(uploadId) - .partNumber(partNumber).build(); - RequestBody body = RequestBody - .fromInputStream(new ByteArrayInputStream(buffer, 0, bytesRead), bytesRead); - UploadPartResponse uploadPartResponse = getClient().uploadPart(uploadPartRequest, body); - - etags.put(partNumber, uploadPartResponse.eTag()); - partNumber++; - uploadedBytes += bytesRead; - } - - List completedParts = etags.entrySet().stream() - .map(entry -> CompletedPart.builder() - .partNumber(entry.getKey()) - .eTag(entry.getValue()) - .build()) - .collect(Collectors.toList()); - CompletedMultipartUpload completedMultipartUpload = CompletedMultipartUpload.builder() - .parts(completedParts) - .build(); - - CompleteMultipartUploadRequest completeMultipartUploadRequest = CompleteMultipartUploadRequest.builder() - .bucket(uri.getBucket()) - .key(uri.getKey()) - .uploadId(uploadId) - .multipartUpload(completedMultipartUpload) - .build(); - - getClient().completeMultipartUpload(completeMultipartUploadRequest); - } catch (Exception e) { - LOG.warn("remotePath:{}, ", remotePath, e); - st = new Status(Status.ErrCode.COMMON_ERROR, "Failed to multipartUpload " + remotePath - + " reason: " + e.getMessage()); - - if (uri != null && uploadId != null) { - try { - AbortMultipartUploadRequest abortMultipartUploadRequest = AbortMultipartUploadRequest.builder() - .bucket(uri.getBucket()) - .key(uri.getKey()) - .uploadId(uploadId) - .build(); - getClient().abortMultipartUpload(abortMultipartUploadRequest); - } catch (Exception e1) { - LOG.warn("Failed to abort multipartUpload {}", remotePath, e1); - } - } - } - return st; - } - - ListObjectsV2Response listObjectsV2(ListObjectsV2Request request) throws UserException { - return getClient().listObjectsV2(request); - } - - /** - * List all files under the given path with glob pattern. - * For example, if the path is "s3://bucket/path/to/*.csv", - * it will list all files under "s3://bucket/path/to/" with ".csv" suffix. - *

- * Copy from `AzureObjStorage.GlobList` - */ - public Status globList(String remotePath, List result, boolean fileNameOnly) { - long roundCnt = 0; - long elementCnt = 0; - long matchCnt = 0; - long startTime = System.nanoTime(); - try { - S3URI uri = S3URI.create(remotePath, isUsePathStyle, forceParsingByStandardUri); - String bucket = uri.getBucket(); - String globPath = uri.getKey(); // eg: path/to/*.csv - - if (LOG.isDebugEnabled()) { - LOG.debug("globList globPath:{}, remotePath:{}", globPath, remotePath); - } - java.nio.file.Path pathPattern = Paths.get(globPath); - PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + pathPattern); - HashSet directorySet = new HashSet<>(); - - String listPrefix = S3Util.getLongestPrefix(globPath); // similar to Azure - if (LOG.isDebugEnabled()) { - LOG.debug("globList listPrefix: {}", listPrefix); - } - ListObjectsV2Request request = ListObjectsV2Request.builder() - .bucket(bucket) - .prefix(listPrefix) - .build(); - - boolean isTruncated = false; - do { - roundCnt++; - ListObjectsV2Response response = listObjectsV2(request); - for (S3Object obj : response.contents()) { - elementCnt++; - java.nio.file.Path objPath = Paths.get(obj.key()); - - boolean isPrefix = false; - while (objPath != null && objPath.normalize().toString().startsWith(listPrefix)) { - if (!matcher.matches(objPath)) { - isPrefix = true; - objPath = objPath.getParent(); - continue; - } - if (directorySet.contains(objPath.normalize().toString())) { - break; - } - if (isPrefix) { - directorySet.add(objPath.normalize().toString()); - } - - matchCnt++; - RemoteFile remoteFile = new RemoteFile( - fileNameOnly ? objPath.getFileName().toString() : - "s3://" + bucket + "/" + objPath.toString(), - !isPrefix, - isPrefix ? -1 : obj.size(), - isPrefix ? -1 : obj.size(), - isPrefix ? 0 : obj.lastModified().toEpochMilli() - ); - result.add(remoteFile); - objPath = objPath.getParent(); - isPrefix = true; - } - } - - isTruncated = response.isTruncated(); - if (isTruncated) { - request = request.toBuilder() - .continuationToken(response.nextContinuationToken()) - .build(); - } - } while (isTruncated); - - if (LOG.isDebugEnabled()) { - LOG.debug("remotePath:{}, result:{}", remotePath, result); - } - return Status.OK; - } catch (Exception e) { - LOG.warn("Errors while getting file status", e); - return new Status(Status.ErrCode.COMMON_ERROR, "Errors while getting file status " + e.getMessage()); - } finally { - long endTime = System.nanoTime(); - long duration = endTime - startTime; - if (LOG.isDebugEnabled()) { - LOG.debug("process {} elements under prefix {} for {} round, match {} elements, take {} ms", - elementCnt, remotePath, roundCnt, matchCnt, - duration / 1000 / 1000); - } - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/AzureFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/AzureFileSystem.java deleted file mode 100644 index 3db9af44a8446b..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/AzureFileSystem.java +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote; - -import org.apache.doris.analysis.StorageBackend.StorageType; -import org.apache.doris.backup.Status; -import org.apache.doris.common.UserException; -import org.apache.doris.datasource.property.storage.AzureProperties; -import org.apache.doris.fsv2.obj.AzureObjStorage; - -import org.apache.hadoop.fs.FileSystem; - -import java.util.List; - -public class AzureFileSystem extends ObjFileSystem { - - public AzureFileSystem(AzureProperties azureProperties) { - super(StorageType.AZURE.name(), StorageType.S3, new AzureObjStorage(azureProperties)); - this.storageProperties = azureProperties; - this.properties.putAll(storageProperties.getOrigProps()); - } - - @Override - protected FileSystem nativeFileSystem(String remotePath) throws UserException { - return null; - } - - @Override - public Status globList(String remotePath, List result, boolean fileNameOnly) { - AzureObjStorage azureObjStorage = (AzureObjStorage) getObjStorage(); - return azureObjStorage.globList(remotePath, result, fileNameOnly); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/BrokerFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/BrokerFileSystem.java deleted file mode 100644 index 88e0f90fc1911d..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/BrokerFileSystem.java +++ /dev/null @@ -1,704 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote; - -import org.apache.doris.analysis.StorageBackend; -import org.apache.doris.backup.Status; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.FsBroker; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.ClientPool; -import org.apache.doris.common.Pair; -import org.apache.doris.common.UserException; -import org.apache.doris.common.util.BrokerUtil; -import org.apache.doris.datasource.property.PropertyConverter; -import org.apache.doris.datasource.property.storage.BrokerProperties; -import org.apache.doris.fs.operations.BrokerFileOperations; -import org.apache.doris.fs.operations.OpParams; -import org.apache.doris.service.FrontendOptions; -import org.apache.doris.thrift.TBrokerCheckPathExistRequest; -import org.apache.doris.thrift.TBrokerCheckPathExistResponse; -import org.apache.doris.thrift.TBrokerDeletePathRequest; -import org.apache.doris.thrift.TBrokerFD; -import org.apache.doris.thrift.TBrokerFileStatus; -import org.apache.doris.thrift.TBrokerIsSplittableRequest; -import org.apache.doris.thrift.TBrokerIsSplittableResponse; -import org.apache.doris.thrift.TBrokerListPathRequest; -import org.apache.doris.thrift.TBrokerListResponse; -import org.apache.doris.thrift.TBrokerOperationStatus; -import org.apache.doris.thrift.TBrokerOperationStatusCode; -import org.apache.doris.thrift.TBrokerPReadRequest; -import org.apache.doris.thrift.TBrokerPWriteRequest; -import org.apache.doris.thrift.TBrokerReadResponse; -import org.apache.doris.thrift.TBrokerRenamePathRequest; -import org.apache.doris.thrift.TBrokerVersion; -import org.apache.doris.thrift.TNetworkAddress; -import org.apache.doris.thrift.TPaloBrokerService; - -import com.google.common.base.Preconditions; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.thrift.TException; -import org.apache.thrift.transport.TTransportException; - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.nio.file.FileVisitOption; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Comparator; -import java.util.List; -import java.util.Map; - -public class BrokerFileSystem extends RemoteFileSystem { - private static final Logger LOG = LogManager.getLogger(BrokerFileSystem.class); - private final BrokerFileOperations operations; - - public BrokerFileSystem(String name, Map properties) { - super(name, StorageBackend.StorageType.BROKER); - properties.putAll(PropertyConverter.convertToHadoopFSProperties(properties)); - this.properties = properties; - this.operations = new BrokerFileOperations(name, properties); - // support broker properties in future - this.storageProperties = new BrokerProperties(properties); - } - - public Pair getBroker() { - Pair result = Pair.of(null, null); - FsBroker broker; - try { - String localIP = FrontendOptions.getLocalHostAddress(); - broker = Env.getCurrentEnv().getBrokerMgr().getBroker(name, localIP); - } catch (AnalysisException e) { - LOG.warn("failed to get a broker address: " + e.getMessage()); - return null; - } - TNetworkAddress address = new TNetworkAddress(broker.host, broker.port); - TPaloBrokerService.Client client; - try { - client = ClientPool.brokerPool.borrowObject(address); - } catch (Exception e) { - LOG.warn("failed to get broker client: " + e.getMessage()); - return null; - } - - result.first = client; - result.second = address; - LOG.info("get broker: {}", BrokerUtil.printBroker(name, address)); - return result; - } - - @Override - public Status exists(String remotePath) { - // 1. get a proper broker - Pair pair = getBroker(); - if (pair == null) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // check path - boolean needReturn = true; - try { - TBrokerCheckPathExistRequest req = new TBrokerCheckPathExistRequest(TBrokerVersion.VERSION_ONE, - remotePath, properties); - TBrokerCheckPathExistResponse rep = client.checkPathExist(req); - TBrokerOperationStatus opst = rep.getOpStatus(); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to check remote path exist: " + remotePath - + ", broker: " + BrokerUtil.printBroker(name, address) - + ". msg: " + opst.getMessage()); - } - - if (!rep.isIsPathExist()) { - return new Status(Status.ErrCode.NOT_FOUND, "remote path does not exist: " + remotePath); - } - - return Status.OK; - } catch (TException e) { - needReturn = false; - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to check remote path exist: " + remotePath - + ", broker: " + BrokerUtil.printBroker(name, address) - + ". msg: " + e.getMessage()); - } finally { - if (needReturn) { - ClientPool.brokerPool.returnObject(address, client); - } else { - ClientPool.brokerPool.invalidateObject(address, client); - } - } - } - - @Override - public Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize) { - if (LOG.isDebugEnabled()) { - LOG.debug("download from {} to {}, file size: {}.", remoteFilePath, localFilePath, fileSize); - } - - long start = System.currentTimeMillis(); - - // 1. get a proper broker - Pair pair = getBroker(); - if (pair == null) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // 2. open file reader with broker - TBrokerFD fd = new TBrokerFD(); - Status opStatus = operations.openReader(OpParams.of(client, address, remoteFilePath, fd)); - if (!opStatus.ok()) { - return opStatus; - } - LOG.info("finished to open reader. fd: {}. download {} to {}.", - fd, remoteFilePath, localFilePath); - Preconditions.checkNotNull(fd); - // 3. delete local file if exist - File localFile = new File(localFilePath); - if (localFile.exists()) { - try { - Files.walk(Paths.get(localFilePath), FileVisitOption.FOLLOW_LINKS) - .sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); - } catch (IOException e) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to delete exist local file: " + localFilePath); - } - } - - // 4. create local file - Status status = Status.OK; - try { - if (!localFile.createNewFile()) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to create local file: " + localFilePath); - } - } catch (IOException e) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to create local file: " - + localFilePath + ", msg: " + e.getMessage()); - } - - // 5. read remote file with broker and write to local - String lastErrMsg = null; - try (BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(localFile))) { - final long bufSize = 1024 * 1024; // 1MB - long leftSize = fileSize; - long readOffset = 0; - while (leftSize > 0) { - long readLen = Math.min(leftSize, bufSize); - TBrokerReadResponse rep = null; - // We only retry if we encounter a timeout thrift exception. - int tryTimes = 0; - while (tryTimes < 3) { - try { - TBrokerPReadRequest req = new TBrokerPReadRequest(TBrokerVersion.VERSION_ONE, - fd, readOffset, readLen); - rep = client.pread(req); - if (rep.getOpStatus().getStatusCode() != TBrokerOperationStatusCode.OK) { - // pread return failure. - lastErrMsg = String.format("failed to read via broker %s. " - + "current read offset: %d, read length: %d," - + " file size: %d, file: %s, err code: %d, msg: %s", - BrokerUtil.printBroker(name, address), - readOffset, readLen, fileSize, - remoteFilePath, rep.getOpStatus().getStatusCode().getValue(), - rep.getOpStatus().getMessage()); - LOG.warn(lastErrMsg); - status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); - } - if (rep.opStatus.statusCode != TBrokerOperationStatusCode.END_OF_FILE) { - if (LOG.isDebugEnabled()) { - LOG.debug("download. readLen: {}, read data len: {}, left size:{}. total size: {}", - readLen, rep.getData().length, leftSize, fileSize); - } - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("read eof: " + remoteFilePath); - } - } - break; - } catch (TTransportException e) { - if (e.getType() == TTransportException.TIMED_OUT) { - // we only retry when we encounter timeout exception. - lastErrMsg = String.format("failed to read via broker %s. " - + "current read offset: %d, read length: %d," - + " file size: %d, file: %s, timeout.", - BrokerUtil.printBroker(name, address), - readOffset, readLen, fileSize, - remoteFilePath); - tryTimes++; - continue; - } - - lastErrMsg = String.format("failed to read via broker %s. " - + "current read offset: %d, read length: %d," - + " file size: %d, file: %s. msg: %s", - BrokerUtil.printBroker(name, address), - readOffset, readLen, fileSize, - remoteFilePath, e.getMessage()); - LOG.warn(lastErrMsg); - status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); - break; - } catch (TException e) { - lastErrMsg = String.format("failed to read via broker %s. " - + "current read offset: %d, read length: %d," - + " file size: %d, file: %s. msg: %s", - BrokerUtil.printBroker(name, address), - readOffset, readLen, fileSize, - remoteFilePath, e.getMessage()); - LOG.warn(lastErrMsg); - status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); - break; - } - } // end of retry loop - - if (status.ok() && tryTimes < 3) { - // read succeed, write to local file - Preconditions.checkNotNull(rep); - // NOTICE(cmy): Sometimes the actual read length does not equal to the expected read length, - // even if the broker's read buffer size is large enough. - // I don't know why, but have to adapt to it. - if (rep.getData().length != readLen) { - LOG.warn("the actual read length does not equal to " - + "the expected read length: {} vs. {}, file: {}, broker: {}", - rep.getData().length, readLen, remoteFilePath, - BrokerUtil.printBroker(name, address)); - } - - out.write(rep.getData()); - readOffset += rep.getData().length; - leftSize -= rep.getData().length; - } else { - status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); - break; - } - } // end of reading remote file - } catch (IOException e) { - return new Status(Status.ErrCode.COMMON_ERROR, "Got exception: " + e.getMessage() + ", broker: " - + BrokerUtil.printBroker(name, address)); - } finally { - // close broker reader - Status closeStatus = operations.closeReader(OpParams.of(client, address, fd)); - if (!closeStatus.ok()) { - LOG.warn(closeStatus.getErrMsg()); - if (status.ok()) { - // we return close write error only if no other error has been encountered. - status = closeStatus; - } - ClientPool.brokerPool.invalidateObject(address, client); - } else { - ClientPool.brokerPool.returnObject(address, client); - } - } - - LOG.info("finished to download from {} to {} with size: {}. cost {} ms", - remoteFilePath, localFilePath, fileSize, (System.currentTimeMillis() - start)); - return status; - } - - // directly upload the content to remote file - @Override - public Status directUpload(String content, String remoteFile) { - // 1. get a proper broker - Pair pair = getBroker(); - if (pair == null) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - TBrokerFD fd = new TBrokerFD(); - Status status = Status.OK; - try { - // 2. open file writer with broker - status = operations.openWriter(OpParams.of(client, address, remoteFile, fd)); - if (!status.ok()) { - return status; - } - - // 3. write content - try { - ByteBuffer bb = ByteBuffer.wrap(content.getBytes(StandardCharsets.UTF_8)); - TBrokerPWriteRequest req = new TBrokerPWriteRequest(TBrokerVersion.VERSION_ONE, fd, 0, bb); - TBrokerOperationStatus opst = client.pwrite(req); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - // pwrite return failure. - status = new Status(Status.ErrCode.COMMON_ERROR, "write failed: " + opst.getMessage() - + ", broker: " + BrokerUtil.printBroker(name, address)); - } - } catch (TException e) { - status = new Status(Status.ErrCode.BAD_CONNECTION, "write exception: " + e.getMessage() - + ", broker: " + BrokerUtil.printBroker(name, address)); - } - } finally { - Status closeStatus = operations.closeWriter(OpParams.of(client, address, fd)); - if (closeStatus.getErrCode() == Status.ErrCode.BAD_CONNECTION - || status.getErrCode() == Status.ErrCode.BAD_CONNECTION) { - ClientPool.brokerPool.invalidateObject(address, client); - } else { - ClientPool.brokerPool.returnObject(address, client); - } - } - - return status; - } - - @Override - public Status upload(String localPath, String remotePath) { - long start = System.currentTimeMillis(); - // 1. get a proper broker - Pair pair = getBroker(); - if (pair == null) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // 2. open file write with broker - TBrokerFD fd = new TBrokerFD(); - Status status = operations.openWriter(OpParams.of(client, address, remotePath, fd)); - if (!status.ok()) { - return status; - } - - // 3. read local file and write to remote with broker - File localFile = new File(localPath); - long fileLength = localFile.length(); - byte[] readBuf = new byte[1024]; - try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(localFile))) { - // save the last err msg - String lastErrMsg = null; - // save the current write offset of remote file - long writeOffset = 0; - // read local file, 1MB at a time - int bytesRead; - while ((bytesRead = in.read(readBuf)) != -1) { - ByteBuffer bb = ByteBuffer.wrap(readBuf, 0, bytesRead); - - // We only retry if we encounter a timeout thrift exception. - int tryTimes = 0; - while (tryTimes < 3) { - try { - TBrokerPWriteRequest req - = new TBrokerPWriteRequest(TBrokerVersion.VERSION_ONE, fd, writeOffset, bb); - TBrokerOperationStatus opst = client.pwrite(req); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - // pwrite return failure. - lastErrMsg = String.format("failed to write via broker %s. " - + "current write offset: %d, write length: %d," - + " file length: %d, file: %s, err code: %d, msg: %s", - BrokerUtil.printBroker(name, address), - writeOffset, bytesRead, fileLength, - remotePath, opst.getStatusCode().getValue(), opst.getMessage()); - LOG.warn(lastErrMsg); - status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); - } - break; - } catch (TTransportException e) { - if (e.getType() == TTransportException.TIMED_OUT) { - // we only retry when we encounter timeout exception. - lastErrMsg = String.format("failed to write via broker %s. " - + "current write offset: %d, write length: %d," - + " file length: %d, file: %s. timeout", - BrokerUtil.printBroker(name, address), - writeOffset, bytesRead, fileLength, - remotePath); - tryTimes++; - continue; - } - - lastErrMsg = String.format("failed to write via broker %s. " - + "current write offset: %d, write length: %d," - + " file length: %d, file: %s. encounter TTransportException: %s", - BrokerUtil.printBroker(name, address), - writeOffset, bytesRead, fileLength, - remotePath, e.getMessage()); - LOG.warn(lastErrMsg, e); - status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); - break; - } catch (TException e) { - lastErrMsg = String.format("failed to write via broker %s. " - + "current write offset: %d, write length: %d," - + " file length: %d, file: %s. encounter TException: %s", - BrokerUtil.printBroker(name, address), - writeOffset, bytesRead, fileLength, - remotePath, e.getMessage()); - LOG.warn(lastErrMsg, e); - status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); - break; - } - } - - if (status.ok() && tryTimes < 3) { - // write succeed, update current write offset - writeOffset += bytesRead; - } else { - status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); - break; - } - } // end of read local file loop - } catch (FileNotFoundException e1) { - return new Status(Status.ErrCode.COMMON_ERROR, "encounter file not found exception: " + e1.getMessage() - + ", broker: " + BrokerUtil.printBroker(name, address)); - } catch (IOException e1) { - return new Status(Status.ErrCode.COMMON_ERROR, "encounter io exception: " + e1.getMessage() - + ", broker: " + BrokerUtil.printBroker(name, address)); - } finally { - // close write - Status closeStatus = operations.closeWriter(OpParams.of(client, address, fd)); - if (!closeStatus.ok()) { - LOG.warn(closeStatus.getErrMsg()); - if (status.ok()) { - // we return close write error only if no other error has been encountered. - status = closeStatus; - } - ClientPool.brokerPool.invalidateObject(address, client); - } else { - ClientPool.brokerPool.returnObject(address, client); - } - } - - if (status.ok()) { - LOG.info("finished to upload {} to remote path {}. cost: {} ms", - localPath, remotePath, (System.currentTimeMillis() - start)); - } - return status; - } - - @Override - public Status rename(String origFilePath, String destFilePath) { - long start = System.currentTimeMillis(); - // 1. get a proper broker - Pair pair = getBroker(); - if (pair == null) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // 2. rename - boolean needReturn = true; - try { - TBrokerRenamePathRequest req = new TBrokerRenamePathRequest(TBrokerVersion.VERSION_ONE, - origFilePath, destFilePath, properties); - TBrokerOperationStatus ost = client.renamePath(req); - if (ost.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to rename " + origFilePath + " to " + destFilePath + ", msg: " + ost.getMessage() - + ", broker: " + BrokerUtil.printBroker(name, address)); - } - } catch (TException e) { - needReturn = false; - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to rename " + origFilePath + " to " + destFilePath + ", msg: " + e.getMessage() - + ", broker: " + BrokerUtil.printBroker(name, address)); - } finally { - if (needReturn) { - ClientPool.brokerPool.returnObject(address, client); - } else { - ClientPool.brokerPool.invalidateObject(address, client); - } - } - - LOG.info("finished to rename {} to {}. cost: {} ms", - origFilePath, destFilePath, (System.currentTimeMillis() - start)); - return Status.OK; - } - - @Override - public Status delete(String remotePath) { - // get a proper broker - Pair pair = getBroker(); - if (pair == null) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // delete - boolean needReturn = true; - try { - TBrokerDeletePathRequest req = new TBrokerDeletePathRequest(TBrokerVersion.VERSION_ONE, - remotePath, properties); - TBrokerOperationStatus opst = client.deletePath(req); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to delete remote path: " + remotePath + ". msg: " + opst.getMessage() - + ", broker: " + BrokerUtil.printBroker(name, address)); - } - - LOG.info("finished to delete remote path {}.", remotePath); - } catch (TException e) { - needReturn = false; - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to delete remote path: " + remotePath + ". msg: " + e.getMessage() - + ", broker: " + BrokerUtil.printBroker(name, address)); - } finally { - if (needReturn) { - ClientPool.brokerPool.returnObject(address, client); - } else { - ClientPool.brokerPool.invalidateObject(address, client); - } - } - - return Status.OK; - } - - @Override - public Status listFiles(String remotePath, boolean recursive, List result) { - // get a proper broker - Pair pair = getBroker(); - if (pair == null) { - return new Status(Status.ErrCode.BAD_CONNECTION, "failed to get broker client"); - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // invoke broker 'listLocatedFiles' interface - boolean needReturn = true; - try { - TBrokerListPathRequest req = new TBrokerListPathRequest(TBrokerVersion.VERSION_ONE, remotePath, - recursive, properties); - req.setOnlyFiles(true); - TBrokerListResponse response = client.listLocatedFiles(req); - TBrokerOperationStatus operationStatus = response.getOpStatus(); - if (operationStatus.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to listLocatedFiles, remote path: " + remotePath + ". msg: " - + operationStatus.getMessage() + ", broker: " + BrokerUtil.printBroker(name, address)); - } - List fileStatus = response.getFiles(); - for (TBrokerFileStatus tFile : fileStatus) { - org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(tFile.path); - RemoteFile file = new RemoteFile(path.getName(), path, !tFile.isDir, tFile.isDir, tFile.size, - tFile.getBlockSize(), tFile.getModificationTime(), null /* blockLocations is null*/); - result.add(file); - } - LOG.info("finished to listLocatedFiles, remote path {}. get files: {}", remotePath, result); - return Status.OK; - } catch (TException e) { - needReturn = false; - return new Status(Status.ErrCode.COMMON_ERROR, "failed to listLocatedFiles, remote path: " - + remotePath + ". msg: " + e.getMessage() + ", broker: " + BrokerUtil.printBroker(name, address)); - } finally { - if (needReturn) { - ClientPool.brokerPool.returnObject(address, client); - } else { - ClientPool.brokerPool.invalidateObject(address, client); - } - } - } - - public boolean isSplittable(String remotePath, String inputFormat) throws UserException { - // get a proper broker - Pair pair = getBroker(); - if (pair == null) { - throw new UserException("failed to get broker client"); - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // invoke 'isSplittable' interface - boolean needReturn = true; - try { - TBrokerIsSplittableRequest req = new TBrokerIsSplittableRequest().setVersion(TBrokerVersion.VERSION_ONE) - .setPath(remotePath).setInputFormat(inputFormat).setProperties(properties); - TBrokerIsSplittableResponse response = client.isSplittable(req); - TBrokerOperationStatus operationStatus = response.getOpStatus(); - if (operationStatus.getStatusCode() != TBrokerOperationStatusCode.OK) { - throw new UserException("failed to get path isSplittable, remote path: " + remotePath + ". msg: " - + operationStatus.getMessage() + ", broker: " + BrokerUtil.printBroker(name, address)); - } - boolean result = response.isSplittable(); - LOG.info("finished to get path isSplittable, remote path {} with format {}, isSplittable: {}", - remotePath, inputFormat, result); - return result; - } catch (TException e) { - needReturn = false; - throw new UserException("failed to get path isSplittable, remote path: " - + remotePath + ". msg: " + e.getMessage() + ", broker: " + BrokerUtil.printBroker(name, address)); - } finally { - if (needReturn) { - ClientPool.brokerPool.returnObject(address, client); - } else { - ClientPool.brokerPool.invalidateObject(address, client); - } - } - } - - // List files in remotePath - @Override - public Status globList(String remotePath, List result, boolean fileNameOnly) { - // get a proper broker - Pair pair = getBroker(); - if (pair == null) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to get broker client"); - } - TPaloBrokerService.Client client = pair.first; - TNetworkAddress address = pair.second; - - // list - boolean needReturn = true; - try { - TBrokerListPathRequest req = new TBrokerListPathRequest(TBrokerVersion.VERSION_ONE, remotePath, - false /* not recursive */, properties); - req.setFileNameOnly(fileNameOnly); - TBrokerListResponse rep = client.listPath(req); - TBrokerOperationStatus opst = rep.getOpStatus(); - if (opst.getStatusCode() != TBrokerOperationStatusCode.OK) { - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to list remote path: " + remotePath + ". msg: " + opst.getMessage() - + ", broker: " + BrokerUtil.printBroker(name, address)); - } - - List fileStatus = rep.getFiles(); - for (TBrokerFileStatus tFile : fileStatus) { - RemoteFile file = new RemoteFile(tFile.path, !tFile.isDir, tFile.size, 0, tFile.getModificationTime()); - result.add(file); - } - LOG.info("finished to list remote path {}. get files: {}", remotePath, result); - } catch (TException e) { - needReturn = false; - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to list remote path: " + remotePath + ". msg: " + e.getMessage() - + ", broker: " + BrokerUtil.printBroker(name, address)); - } finally { - if (needReturn) { - ClientPool.brokerPool.returnObject(address, client); - } else { - ClientPool.brokerPool.invalidateObject(address, client); - } - } - - return Status.OK; - } - - @Override - public Status makeDir(String remotePath) { - return new Status(Status.ErrCode.COMMON_ERROR, "mkdir is not implemented."); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/ObjFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/ObjFileSystem.java deleted file mode 100644 index fde018b96f6dc0..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/ObjFileSystem.java +++ /dev/null @@ -1,165 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote; - -import org.apache.doris.analysis.StorageBackend; -import org.apache.doris.backup.Status; -import org.apache.doris.fsv2.obj.ObjStorage; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.file.FileVisitOption; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Comparator; - -public abstract class ObjFileSystem extends RemoteFileSystem { - private static final Logger LOG = LogManager.getLogger(ObjFileSystem.class); - - protected final ObjStorage objStorage; - - public ObjFileSystem(String name, StorageBackend.StorageType type, ObjStorage objStorage) { - super(name, type); - this.objStorage = objStorage; - } - - public ObjStorage getObjStorage() { - return objStorage; - } - - @Override - public Status exists(String remotePath) { - return objStorage.headObject(remotePath); - } - - @Override - public Status directoryExists(String dir) { - return listFiles(dir, false, new ArrayList<>()); - } - - /** - * download data from remote file and check data size with expected file size. - * @param remoteFilePath remote file path - * @param localFilePath local file path - * @param fileSize download data size - * @return - */ - @Override - public Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize) { - long start = System.currentTimeMillis(); - // Write the data to a local file - File localFile = new File(localFilePath); - if (localFile.exists()) { - try { - Files.walk(Paths.get(localFilePath), FileVisitOption.FOLLOW_LINKS) - .sorted(Comparator.reverseOrder()) - .map(Path::toFile) - .forEach(File::delete); - } catch (IOException e) { - return new Status( - Status.ErrCode.COMMON_ERROR, "failed to delete exist local file: " + localFilePath); - } - } - Status st = objStorage.getObject(remoteFilePath, localFile); - if (st != Status.OK) { - return st; - } - if (localFile.length() == fileSize) { - LOG.info( - "finished to get file from {} to {} with size: {}. cost {} ms", - remoteFilePath, - localFile.toPath(), - fileSize, - (System.currentTimeMillis() - start)); - return Status.OK; - } else { - return new Status(Status.ErrCode.COMMON_ERROR, localFile.toString()); - } - } - - @Override - public Status directUpload(String content, String remoteFile) { - Status st = objStorage.putObject(remoteFile, new ByteArrayInputStream(content.getBytes()), content.length()); - if (st != Status.OK) { - return st; - } - LOG.info("upload content success."); - return Status.OK; - } - - @Override - public Status upload(String localPath, String remotePath) { - File localFile = new File(localPath); - Status st = null; - try { - st = objStorage.putObject(remotePath, new FileInputStream(localFile), localFile.length()); - } catch (FileNotFoundException e) { - throw new RuntimeException(e); - } - if (st != Status.OK) { - return st; - } - LOG.info("upload file " + localPath + " success."); - return Status.OK; - } - - @Override - public Status makeDir(String remotePath) { - if (!remotePath.endsWith("/")) { - remotePath += "/"; - } - Status st = objStorage.putObject(remotePath, new ByteArrayInputStream(new byte[0]), 0); - if (st != Status.OK) { - return st; - } - LOG.info("makeDir success."); - return Status.OK; - } - - @Override - public Status rename(String origFilePath, String destFilePath) { - Status status = objStorage.copyObject(origFilePath, destFilePath); - if (status.ok()) { - return delete(origFilePath); - } else { - return status; - } - } - - public Status copy(String origFilePath, String destFilePath) { - return objStorage.copyObject(origFilePath, destFilePath); - } - - @Override - public Status delete(String remotePath) { - return objStorage.deleteObject(remotePath); - } - - @Override - public Status deleteDirectory(String absolutePath) { - return objStorage.deleteObjects(absolutePath); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFSPhantomManager.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFSPhantomManager.java deleted file mode 100644 index 66a17750c57ab5..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFSPhantomManager.java +++ /dev/null @@ -1,126 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote; - -import org.apache.doris.common.CustomThreadFactory; - -import com.google.common.collect.Sets; -import org.apache.hadoop.fs.FileSystem; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.io.IOException; -import java.lang.ref.PhantomReference; -import java.lang.ref.Reference; -import java.lang.ref.ReferenceQueue; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; - -/** - * The RemoteFSPhantomManager class is responsible for managing the phantom references - * of RemoteFileSystem objects. It ensures that the associated FileSystem resources are - * automatically cleaned up when the RemoteFileSystem objects are garbage collected. - *

- * By utilizing a ReferenceQueue and PhantomReference, this class can monitor the lifecycle - * of RemoteFileSystem objects. When a RemoteFileSystem object is no longer in use and is - * garbage collected, its corresponding FileSystem resource is properly closed to prevent - * resource leaks. - *

- * The class provides a thread-safe mechanism to ensure that the cleanup thread is started only once. - *

- * Main functionalities include: - * - Registering phantom references of RemoteFileSystem objects. - * - Starting a periodic cleanup thread that automatically closes unused FileSystem resources. - */ -public class RemoteFSPhantomManager { - - private static final Logger LOG = LogManager.getLogger(RemoteFSPhantomManager.class); - - // Scheduled executor for periodic resource cleanup - private static ScheduledExecutorService cleanupExecutor; - - // Reference queue for monitoring RemoteFileSystem objects' phantom references - private static final ReferenceQueue referenceQueue = new ReferenceQueue<>(); - - // Map storing the phantom references and their corresponding FileSystem objects - private static final ConcurrentHashMap, FileSystem> referenceMap - = new ConcurrentHashMap<>(); - - private static final Set fsSet = Sets.newConcurrentHashSet(); - - // Flag indicating whether the cleanup thread has been started - private static final AtomicBoolean isStarted = new AtomicBoolean(false); - - /** - * Registers a phantom reference for a RemoteFileSystem object in the manager. - * If the cleanup thread has not been started, it will be started. - * - * @param remoteFileSystem the RemoteFileSystem object to be registered - */ - public static void registerPhantomReference(RemoteFileSystem remoteFileSystem) { - if (!isStarted.get()) { - start(); - isStarted.set(true); - } - if (fsSet.contains(remoteFileSystem.dfsFileSystem)) { - throw new RuntimeException("FileSystem already exists: " + remoteFileSystem.dfsFileSystem.getUri()); - } - RemoteFileSystemPhantomReference phantomReference = new RemoteFileSystemPhantomReference(remoteFileSystem, - referenceQueue); - referenceMap.put(phantomReference, remoteFileSystem.dfsFileSystem); - fsSet.add(remoteFileSystem.dfsFileSystem); - } - - /** - * Starts the cleanup thread, which periodically checks and cleans up unused FileSystem resources. - * The method uses double-checked locking to ensure thread-safe startup of the cleanup thread. - */ - public static void start() { - if (isStarted.compareAndSet(false, true)) { - synchronized (RemoteFSPhantomManager.class) { - LOG.info("Starting cleanup thread for RemoteFileSystem objects"); - if (cleanupExecutor == null) { - CustomThreadFactory threadFactory = new CustomThreadFactory("remote-fs-phantom-cleanup"); - cleanupExecutor = Executors.newScheduledThreadPool(1, threadFactory); - cleanupExecutor.scheduleAtFixedRate(() -> { - Reference ref; - while ((ref = referenceQueue.poll()) != null) { - RemoteFileSystemPhantomReference phantomRef = (RemoteFileSystemPhantomReference) ref; - - FileSystem fs = referenceMap.remove(phantomRef); - if (fs != null) { - try { - fs.close(); - fsSet.remove(fs); - LOG.info("Closed file system: {}", fs.getUri()); - } catch (IOException e) { - LOG.warn("Failed to close file system", e); - } - } - } - }, 0, 1, TimeUnit.MINUTES); - } - } - } - } - -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFile.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFile.java deleted file mode 100644 index 04864fdeec587c..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFile.java +++ /dev/null @@ -1,106 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote; - -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.Path; - -// represent a file or a dir in remote storage -public class RemoteFile { - // Only file name, not full path - private final String name; - private final boolean isFile; - private final boolean isDirectory; - private final long size; - // Block size of underlying file system. e.g. HDFS and S3. - // A large file will split into multiple blocks. The blocks are transparent to the user. - // Default block size for HDFS 2.x is 128M. - private final long blockSize; - private long modificationTime; - private Path path; - BlockLocation[] blockLocations; - - public RemoteFile(String name, boolean isFile, long size, long blockSize) { - this(name, null, isFile, !isFile, size, blockSize, 0, null); - } - - public RemoteFile(String name, boolean isFile, long size, long blockSize, long modificationTime) { - this(name, null, isFile, !isFile, size, blockSize, modificationTime, null); - } - - public RemoteFile(Path path, boolean isDirectory, long size, long blockSize, long modificationTime, - BlockLocation[] blockLocations) { - this(path.getName(), path, !isDirectory, isDirectory, size, blockSize, modificationTime, blockLocations); - } - - public RemoteFile(String name, Path path, boolean isFile, boolean isDirectory, - long size, long blockSize, long modificationTime, BlockLocation[] blockLocations) { - Preconditions.checkState(!Strings.isNullOrEmpty(name)); - this.name = name; - this.isFile = isFile; - this.isDirectory = isDirectory; - this.size = size; - this.blockSize = blockSize; - this.modificationTime = modificationTime; - this.path = path; - this.blockLocations = blockLocations; - } - - public String getName() { - return name; - } - - public Path getPath() { - return path; - } - - public void setPath(Path path) { - this.path = path; - } - - public boolean isFile() { - return isFile; - } - - public boolean isDirectory() { - return isDirectory; - } - - public long getSize() { - return size; - } - - public long getBlockSize() { - return blockSize; - } - - public long getModificationTime() { - return modificationTime; - } - - public BlockLocation[] getBlockLocations() { - return blockLocations; - } - - @Override - public String toString() { - return "[name: " + name + ", is file: " + isFile + "]"; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFileSystem.java deleted file mode 100644 index 6e50d476d8a90e..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFileSystem.java +++ /dev/null @@ -1,141 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote; - -import org.apache.doris.analysis.StorageBackend; -import org.apache.doris.backup.Status; -import org.apache.doris.common.UserException; -import org.apache.doris.fsv2.PersistentFileSystem; - -import com.google.common.collect.ImmutableSet; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; - -import java.io.Closeable; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.locks.ReentrantLock; - -public abstract class RemoteFileSystem extends PersistentFileSystem implements Closeable { - // this field will be visited by multi-threads, better use volatile qualifier - protected volatile org.apache.hadoop.fs.FileSystem dfsFileSystem = null; - private final ReentrantLock fsLock = new ReentrantLock(); - protected AtomicBoolean closed = new AtomicBoolean(false); - - public RemoteFileSystem(String name, StorageBackend.StorageType type) { - super(name, type); - } - - protected org.apache.hadoop.fs.FileSystem nativeFileSystem(String remotePath) throws UserException { - throw new UserException("Not support to getFileSystem."); - } - - @Override - public Status listFiles(String remotePath, boolean recursive, List result) { - try { - org.apache.hadoop.fs.FileSystem fileSystem = nativeFileSystem(remotePath); - Path locatedPath = new Path(remotePath); - RemoteIterator locatedFiles = getLocatedFiles(recursive, fileSystem, locatedPath); - while (locatedFiles.hasNext()) { - LocatedFileStatus fileStatus = locatedFiles.next(); - RemoteFile location = new RemoteFile( - fileStatus.getPath(), fileStatus.isDirectory(), fileStatus.getLen(), - fileStatus.getBlockSize(), fileStatus.getModificationTime(), fileStatus.getBlockLocations()); - result.add(location); - } - } catch (FileNotFoundException e) { - return new Status(Status.ErrCode.NOT_FOUND, e.getMessage()); - } catch (Exception e) { - return new Status(Status.ErrCode.COMMON_ERROR, e.getMessage()); - } - return Status.OK; - } - - protected RemoteIterator getLocatedFiles(boolean recursive, - FileSystem fileSystem, Path locatedPath) throws IOException { - return fileSystem.listFiles(locatedPath, recursive); - } - - @Override - public Status listDirectories(String remotePath, Set result) { - try { - FileSystem fileSystem = nativeFileSystem(remotePath); - FileStatus[] fileStatuses = getFileStatuses(remotePath, fileSystem); - result.addAll( - Arrays.stream(fileStatuses) - .filter(FileStatus::isDirectory) - .map(file -> file.getPath().toString() + "/") - .collect(ImmutableSet.toImmutableSet())); - } catch (Exception e) { - return new Status(Status.ErrCode.COMMON_ERROR, e.getMessage()); - } - return Status.OK; - } - - protected FileStatus[] getFileStatuses(String remotePath, FileSystem fileSystem) throws IOException { - return fileSystem.listStatus(new Path(remotePath)); - } - - @Override - public Status renameDir(String origFilePath, - String destFilePath, - Runnable runWhenPathNotExist) { - Status status = exists(destFilePath); - if (status.ok()) { - return new Status(Status.ErrCode.COMMON_ERROR, "Destination directory already exists: " + destFilePath); - } - - String targetParent = new Path(destFilePath).getParent().toString(); - status = exists(targetParent); - if (Status.ErrCode.NOT_FOUND.equals(status.getErrCode())) { - status = makeDir(targetParent); - } - if (!status.ok()) { - return new Status(Status.ErrCode.COMMON_ERROR, status.getErrMsg()); - } - - runWhenPathNotExist.run(); - - return rename(origFilePath, destFilePath); - } - - @Override - public void close() throws IOException { - fsLock.lock(); - try { - if (!closed.getAndSet(true)) { - if (dfsFileSystem != null) { - dfsFileSystem.close(); - } - } - } finally { - fsLock.unlock(); - } - } - - public boolean connectivityTest(List filePaths) throws UserException { - return true; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFileSystemPhantomReference.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFileSystemPhantomReference.java deleted file mode 100644 index bfc507572f2701..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/RemoteFileSystemPhantomReference.java +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote; - -import org.apache.hadoop.fs.FileSystem; - -import java.lang.ref.PhantomReference; -import java.lang.ref.ReferenceQueue; - -public class RemoteFileSystemPhantomReference extends PhantomReference { - - private FileSystem fs; - - /** - * Creates a new phantom reference that refers to the given object and - * is registered with the given queue. - * - *

It is possible to create a phantom reference with a {@code null} - * queue. Such a reference will never be enqueued. - * - * @param referent the object the new phantom reference will refer to - * @param q the queue with which the reference is to be registered, - * or {@code null} if registration is not required - */ - public RemoteFileSystemPhantomReference(RemoteFileSystem referent, ReferenceQueue q) { - super(referent, q); - this.fs = referent.dfsFileSystem; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/S3FileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/S3FileSystem.java deleted file mode 100644 index b63f0effa58a77..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/S3FileSystem.java +++ /dev/null @@ -1,106 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote; - -import org.apache.doris.analysis.StorageBackend; -import org.apache.doris.backup.Status; -import org.apache.doris.common.UserException; -import org.apache.doris.common.security.authentication.HadoopAuthenticator; -import org.apache.doris.common.util.S3URI; -import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties; -import org.apache.doris.fsv2.obj.S3ObjStorage; - -import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.fs.FileSystem; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import software.amazon.awssdk.services.s3.S3Client; - -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -public class S3FileSystem extends ObjFileSystem { - - private static final Logger LOG = LogManager.getLogger(S3FileSystem.class); - private HadoopAuthenticator authenticator = null; - private AbstractS3CompatibleProperties s3Properties; - - - public S3FileSystem(AbstractS3CompatibleProperties s3Properties) { - - super(StorageBackend.StorageType.S3.name(), StorageBackend.StorageType.S3, - new S3ObjStorage(s3Properties)); - this.s3Properties = s3Properties; - this.storageProperties = s3Properties; - initFsProperties(); - - } - - @VisibleForTesting - public S3FileSystem(S3ObjStorage storage) { - super(StorageBackend.StorageType.S3.name(), StorageBackend.StorageType.S3, storage); - initFsProperties(); - } - - private void initFsProperties() { - this.properties.putAll(storageProperties.getOrigProps()); - } - - - @Override - protected FileSystem nativeFileSystem(String remotePath) throws UserException { - throw new UserException("S3 does not support native file system"); - } - - // broker file pattern glob is too complex, so we use hadoop directly - @Override - public Status globList(String remotePath, List result, boolean fileNameOnly) { - S3ObjStorage objStorage = (S3ObjStorage) this.objStorage; - return objStorage.globList(remotePath, result, fileNameOnly); - } - - @Override - public boolean connectivityTest(List filePaths) throws UserException { - if (filePaths == null || filePaths.isEmpty()) { - throw new UserException("File paths cannot be null or empty for connectivity test."); - } - S3ObjStorage objStorage = (S3ObjStorage) this.objStorage; - try { - S3Client s3Client = objStorage.getClient(); - Set bucketNames = new HashSet<>(); - boolean usePathStyle = Boolean.parseBoolean(s3Properties.getUsePathStyle()); - boolean forceParsingByStandardUri = Boolean.parseBoolean(s3Properties.getForceParsingByStandardUrl()); - for (String filePath : filePaths) { - S3URI s3uri; - s3uri = S3URI.create(filePath, usePathStyle, forceParsingByStandardUri); - bucketNames.add(s3uri.getBucket()); - } - bucketNames.forEach(bucketName -> s3Client.headBucket(b -> b.bucket(bucketName))); - return true; - } catch (Exception e) { - LOG.warn("S3 connectivityTest error: {}", e.getMessage(), e); - } - return false; - } - - @VisibleForTesting - public HadoopAuthenticator getAuthenticator() { - return authenticator; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/SwitchingFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/SwitchingFileSystem.java deleted file mode 100644 index 7ef6b462831f79..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/SwitchingFileSystem.java +++ /dev/null @@ -1,132 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote; - -import org.apache.doris.backup.Status; -import org.apache.doris.common.util.LocationPath; -import org.apache.doris.datasource.ExternalMetaCacheMgr; -import org.apache.doris.fsv2.FileSystem; -import org.apache.doris.fsv2.FileSystemCache; - -import java.util.List; -import java.util.Map; -import java.util.Set; - -public class SwitchingFileSystem implements FileSystem { - - private final ExternalMetaCacheMgr extMetaCacheMgr; - - private final String bindBrokerName; - - private final Map properties; - - public SwitchingFileSystem(ExternalMetaCacheMgr extMetaCacheMgr, String bindBrokerName, - Map properties) { - this.extMetaCacheMgr = extMetaCacheMgr; - this.bindBrokerName = bindBrokerName; - this.properties = properties; - } - - @Override - public Map getProperties() { - return properties; - } - - @Override - public Status exists(String remotePath) { - return fileSystem(remotePath).exists(remotePath); - } - - @Override - public Status directoryExists(String dir) { - return fileSystem(dir).directoryExists(dir); - } - - @Override - public Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize) { - return fileSystem(remoteFilePath).downloadWithFileSize(remoteFilePath, localFilePath, fileSize); - } - - @Override - public Status upload(String localPath, String remotePath) { - return fileSystem(localPath).upload(localPath, remotePath); - } - - @Override - public Status directUpload(String content, String remoteFile) { - return fileSystem(remoteFile).directUpload(content, remoteFile); - } - - @Override - public Status rename(String origFilePath, String destFilePath) { - return fileSystem(origFilePath).rename(origFilePath, destFilePath); - } - - @Override - public Status renameDir(String origFilePath, String destFilePath) { - return fileSystem(origFilePath).renameDir(origFilePath, destFilePath); - } - - @Override - public Status renameDir(String origFilePath, String destFilePath, Runnable runWhenPathNotExist) { - return fileSystem(origFilePath).renameDir(origFilePath, destFilePath, runWhenPathNotExist); - } - - @Override - public Status delete(String remotePath) { - return fileSystem(remotePath).delete(remotePath); - } - - @Override - public Status deleteDirectory(String absolutePath) { - return fileSystem(absolutePath).deleteDirectory(absolutePath); - } - - @Override - public Status makeDir(String remotePath) { - return fileSystem(remotePath).makeDir(remotePath); - } - - @Override - public Status listFiles(String remotePath, boolean recursive, List result) { - return fileSystem(remotePath).listFiles(remotePath, recursive, result); - } - - @Override - public Status globList(String remotePath, List result) { - return fileSystem(remotePath).globList(remotePath, result); - } - - @Override - public Status globList(String remotePath, List result, boolean fileNameOnly) { - return fileSystem(remotePath).globList(remotePath, result, fileNameOnly); - } - - @Override - public Status listDirectories(String remotePath, Set result) { - return fileSystem(remotePath).listDirectories(remotePath, result); - } - - public FileSystem fileSystem(String location) { - return extMetaCacheMgr.getFsCache().getRemoteFileSystem( - new FileSystemCache.FileSystemCacheKey( - LocationPath.getFSIdentity(location, properties, - bindBrokerName), properties, bindBrokerName)); - } -} - diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/DFSFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/DFSFileSystem.java deleted file mode 100644 index 8c708399eadb64..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/DFSFileSystem.java +++ /dev/null @@ -1,500 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote.dfs; - -import org.apache.doris.analysis.StorageBackend; -import org.apache.doris.backup.Status; -import org.apache.doris.common.UserException; -import org.apache.doris.common.security.authentication.HadoopAuthenticator; -import org.apache.doris.common.util.URI; -import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties; -import org.apache.doris.fs.operations.HDFSFileOperations; -import org.apache.doris.fs.operations.HDFSOpParams; -import org.apache.doris.fs.operations.OpParams; -import org.apache.doris.fsv2.remote.RemoteFSPhantomManager; -import org.apache.doris.fsv2.remote.RemoteFile; -import org.apache.doris.fsv2.remote.RemoteFileSystem; - -import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.hdfs.HdfsConfiguration; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.file.FileVisitOption; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.Comparator; -import java.util.List; - -public class DFSFileSystem extends RemoteFileSystem { - - public static final String PROP_ALLOW_FALLBACK_TO_SIMPLE_AUTH = "ipc.client.fallback-to-simple-auth-allowed"; - private static final Logger LOG = LogManager.getLogger(DFSFileSystem.class); - private HDFSFileOperations operations = null; - private HadoopAuthenticator authenticator = null; - private HdfsCompatibleProperties hdfsProperties; - - public DFSFileSystem(HdfsCompatibleProperties hdfsProperties) { - super(StorageBackend.StorageType.HDFS.name(), StorageBackend.StorageType.HDFS); - this.properties.putAll(hdfsProperties.getOrigProps()); - this.storageProperties = hdfsProperties; - this.hdfsProperties = hdfsProperties; - } - - public DFSFileSystem(HdfsCompatibleProperties hdfsProperties, StorageBackend.StorageType storageType) { - super(storageType.name(), storageType); - this.properties.putAll(hdfsProperties.getOrigProps()); - this.hdfsProperties = hdfsProperties; - } - - @VisibleForTesting - @Override - public FileSystem nativeFileSystem(String remotePath) throws UserException { - if (closed.get()) { - throw new UserException("FileSystem is closed."); - } - if (dfsFileSystem == null) { - synchronized (this) { - if (closed.get()) { - throw new UserException("FileSystem is closed."); - } - if (dfsFileSystem == null) { - Configuration conf = hdfsProperties.getHadoopConfiguration(); - // TODO: Temporarily disable the HDFS file system cache to prevent instances from being closed by - // each other in V1. This line can be removed once V1 and V2 are unified. - conf.set("fs.hdfs.impl.disable.cache", "true"); - authenticator = HadoopAuthenticator.getHadoopAuthenticator(conf); - try { - dfsFileSystem = authenticator.doAs(() -> { - try { - return FileSystem.get(new Path(remotePath).toUri(), conf); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - operations = new HDFSFileOperations(dfsFileSystem); - RemoteFSPhantomManager.registerPhantomReference(this); - } catch (Exception e) { - throw new UserException("Failed to get dfs FileSystem for " + e.getMessage(), e); - } - } - } - } - return dfsFileSystem; - } - - protected RemoteIterator getLocatedFiles(boolean recursive, - FileSystem fileSystem, Path locatedPath) throws IOException { - return authenticator.doAs(() -> fileSystem.listFiles(locatedPath, recursive)); - } - - protected FileStatus[] getFileStatuses(String remotePath, FileSystem fileSystem) throws IOException { - return authenticator.doAs(() -> fileSystem.listStatus(new Path(remotePath))); - } - - public static Configuration getHdfsConf(boolean fallbackToSimpleAuth) { - Configuration hdfsConf = new HdfsConfiguration(); - if (fallbackToSimpleAuth) { - // need support fallback to simple if the cluster is a mixture of kerberos and simple auth. - hdfsConf.set(PROP_ALLOW_FALLBACK_TO_SIMPLE_AUTH, "true"); - } - return hdfsConf; - } - - @Override - public Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize) { - if (LOG.isDebugEnabled()) { - LOG.debug("download from {} to {}, file size: {}.", remoteFilePath, localFilePath, fileSize); - } - final long start = System.currentTimeMillis(); - HDFSOpParams hdfsOpParams = OpParams.of(remoteFilePath); - Status st = operations.openReader(hdfsOpParams); - if (st != Status.OK) { - return st; - } - FSDataInputStream fsDataInputStream = hdfsOpParams.fsDataInputStream(); - LOG.info("finished to open reader. download {} to {}.", remoteFilePath, localFilePath); - - // delete local file if exist - File localFile = new File(localFilePath); - if (localFile.exists()) { - try { - Files.walk(Paths.get(localFilePath), FileVisitOption.FOLLOW_LINKS).sorted(Comparator.reverseOrder()) - .map(java.nio.file.Path::toFile).forEach(File::delete); - } catch (IOException e) { - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to delete exist local file: " + localFilePath + ", msg: " + e.getMessage()); - } - } - // create local file - try { - if (!localFile.createNewFile()) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to create local file: " + localFilePath); - } - } catch (IOException e) { - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to create local file: " + localFilePath + ", msg: " + e.getMessage()); - } - - String lastErrMsg; - Status status = Status.OK; - try (BufferedOutputStream out = new BufferedOutputStream(Files.newOutputStream(localFile.toPath()))) { - final long bufSize = 1024 * 1024; // 1MB - long leftSize = fileSize; - long readOffset = 0; - while (leftSize > 0) { - long readLen = Math.min(leftSize, bufSize); - try { - ByteBuffer data = readStreamBuffer(fsDataInputStream, readOffset, readLen); - if (readLen != data.array().length) { - LOG.warn( - "the actual read length does not equal to " - + "the expected read length: {} vs. {}, file: {}", - data.array().length, readLen, remoteFilePath); - } - // write local file - out.write(data.array()); - readOffset += data.array().length; - leftSize -= data.array().length; - } catch (Exception e) { - lastErrMsg = String.format( - "failed to read. " + "current read offset: %d, read length: %d," - + " file size: %d, file: %s. msg: %s", - readOffset, readLen, fileSize, remoteFilePath, e.getMessage()); - LOG.warn(lastErrMsg); - status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); - break; - } - } - } catch (IOException e) { - return new Status(Status.ErrCode.COMMON_ERROR, "Got exception: " + e.getMessage()); - } finally { - Status closeStatus = operations.closeReader(OpParams.of(fsDataInputStream)); - if (!closeStatus.ok()) { - LOG.warn(closeStatus.getErrMsg()); - if (status.ok()) { - // we return close write error only if no other error has been encountered. - status = closeStatus; - } - } - } - - LOG.info("finished to download from {} to {} with size: {}. cost {} ms", remoteFilePath, localFilePath, - fileSize, (System.currentTimeMillis() - start)); - return status; - } - - /** - * read data from fsDataInputStream. - * - * @param fsDataInputStream input stream for read. - * @param readOffset read offset. - * @param length read length. - * @return ByteBuffer - * @throws IOException when read data error. - */ - private static ByteBuffer readStreamBuffer(FSDataInputStream fsDataInputStream, long readOffset, long length) - throws IOException { - synchronized (fsDataInputStream) { - long currentStreamOffset; - try { - currentStreamOffset = fsDataInputStream.getPos(); - } catch (IOException e) { - LOG.warn("errors while get file pos from output stream", e); - throw new IOException("errors while get file pos from output stream", e); - } - if (currentStreamOffset != readOffset) { - // it's ok, when reading some format like parquet, it is not a sequential read - if (LOG.isDebugEnabled()) { - LOG.debug("invalid offset, current read offset is " + currentStreamOffset - + " is not equal to request offset " + readOffset + " seek to it"); - } - try { - fsDataInputStream.seek(readOffset); - } catch (IOException e) { - throw new IOException(String.format( - "current read offset %d is not equal to %d, and could not seek to it, msg: %s", - currentStreamOffset, readOffset, e.getMessage())); - } - } - // Avoid using the ByteBuffer based read for Hadoop because some - // FSDataInputStream - // implementations are not ByteBufferReadable, - // See https://issues.apache.org/jira/browse/HADOOP-14603 - byte[] buf; - if (length > HDFSFileOperations.READ_BUFFER_SIZE) { - buf = new byte[HDFSFileOperations.READ_BUFFER_SIZE]; - } else { - buf = new byte[(int) length]; - } - try { - int readLength = readBytesFully(fsDataInputStream, buf); - if (readLength < 0) { - throw new IOException("end of file reached"); - } - if (LOG.isDebugEnabled()) { - LOG.debug( - "read buffer from input stream, buffer size:" + buf.length + ", read length:" + readLength); - } - return ByteBuffer.wrap(buf, 0, readLength); - } catch (IOException e) { - LOG.warn("errors while read data from stream", e); - throw new IOException("errors while read data from stream " + e.getMessage()); - } - } - } - - private static int readBytesFully(FSDataInputStream is, byte[] dest) throws IOException { - int readLength = 0; - while (readLength < dest.length) { - int availableReadLength = dest.length - readLength; - int n = is.read(dest, readLength, availableReadLength); - if (n <= 0) { - break; - } - readLength += n; - } - return readLength; - } - - @Override - public Status exists(String remotePath) { - try { - URI pathUri = URI.create(remotePath); - Path inputFilePath = new Path(pathUri.getPath()); - FileSystem fileSystem = nativeFileSystem(remotePath); - boolean isPathExist = authenticator.doAs(() -> fileSystem.exists(inputFilePath)); - if (!isPathExist) { - return new Status(Status.ErrCode.NOT_FOUND, "remote path does not exist: " + remotePath); - } - return Status.OK; - } catch (Exception e) { - LOG.warn("errors while check path exist " + remotePath, e); - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to check remote path exist: " + remotePath + ". msg: " + e.getMessage()); - } - } - - @Override - public Status directUpload(String content, String remoteFile) { - HDFSOpParams hdfsOpParams = OpParams.of(remoteFile); - Status wst = operations.openWriter(hdfsOpParams); - if (wst != Status.OK) { - return wst; - } - FSDataOutputStream fsDataOutputStream = hdfsOpParams.fsDataOutputStream(); - LOG.info("finished to open writer. directly upload to remote path {}.", remoteFile); - - Status status = Status.OK; - try { - fsDataOutputStream.writeBytes(content); - } catch (IOException e) { - LOG.warn("errors while write data to output stream", e); - status = new Status(Status.ErrCode.COMMON_ERROR, "write exception: " + e.getMessage()); - } finally { - Status closeStatus = operations.closeWriter(OpParams.of(fsDataOutputStream)); - if (!closeStatus.ok()) { - LOG.warn(closeStatus.getErrMsg()); - if (status.ok()) { - status = closeStatus; - } - } - } - return status; - } - - @Override - public Status upload(String localPath, String remotePath) { - long start = System.currentTimeMillis(); - if (LOG.isDebugEnabled()) { - LOG.debug("local path {}, remote path {}", localPath, remotePath); - } - HDFSOpParams hdfsOpParams = OpParams.of(remotePath); - Status wst = operations.openWriter(hdfsOpParams); - if (wst != Status.OK) { - return wst; - } - FSDataOutputStream fsDataOutputStream = hdfsOpParams.fsDataOutputStream(); - LOG.info("finished to open writer. directly upload to remote path {}.", remotePath); - // read local file and write remote - File localFile = new File(localPath); - long fileLength = localFile.length(); - byte[] readBuf = new byte[1024]; - Status status = new Status(Status.ErrCode.OK, ""); - try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(localFile))) { - // save the last err msg - String lastErrMsg = null; - // save the current write offset of remote file - long writeOffset = 0; - // read local file, 1MB at a time - int bytesRead; - while ((bytesRead = in.read(readBuf)) != -1) { - try { - fsDataOutputStream.write(readBuf, 0, bytesRead); - } catch (IOException e) { - LOG.warn("errors while write data to output stream", e); - lastErrMsg = String.format( - "failed to write hdfs. current write offset: %d, write length: %d, " - + "file length: %d, file: %s, msg: errors while write data to output stream", - writeOffset, bytesRead, fileLength, remotePath); - status = new Status(Status.ErrCode.COMMON_ERROR, lastErrMsg); - break; - } - - // write succeed, update current write offset - writeOffset += bytesRead; - } // end of read local file loop - } catch (FileNotFoundException e1) { - return new Status(Status.ErrCode.COMMON_ERROR, "encounter file not found exception: " + e1.getMessage()); - } catch (IOException e1) { - return new Status(Status.ErrCode.COMMON_ERROR, "encounter io exception: " + e1.getMessage()); - } finally { - Status closeStatus = operations.closeWriter(OpParams.of(fsDataOutputStream)); - if (!closeStatus.ok()) { - LOG.warn(closeStatus.getErrMsg()); - if (status.ok()) { - // we return close write error only if no other error has been encountered. - status = closeStatus; - } - } - } - - if (status.ok()) { - LOG.info("finished to upload {} to remote path {}. cost: {} ms", localPath, remotePath, - (System.currentTimeMillis() - start)); - } - return status; - } - - @Override - public Status rename(String srcPath, String destPath) { - long start = System.currentTimeMillis(); - try { - URI srcPathUri = URI.create(srcPath); - URI destPathUri = URI.create(destPath); - if (!srcPathUri.getAuthority().trim().equals(destPathUri.getAuthority().trim())) { - return new Status(Status.ErrCode.COMMON_ERROR, "only allow rename in same file system"); - } - FileSystem fileSystem = nativeFileSystem(destPath); - Path srcfilePath = new Path(srcPathUri.getPath()); - Path destfilePath = new Path(destPathUri.getPath()); - boolean isRenameSuccess = authenticator.doAs(() -> fileSystem.rename(srcfilePath, destfilePath)); - if (!isRenameSuccess) { - return new Status(Status.ErrCode.COMMON_ERROR, "failed to rename " + srcPath + " to " + destPath); - } - } catch (UserException e) { - return new Status(Status.ErrCode.COMMON_ERROR, e.getMessage()); - } catch (IOException e) { - LOG.warn("errors while rename path from " + srcPath + " to " + destPath); - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to rename remote " + srcPath + " to " + destPath + ", msg: " + e.getMessage()); - } - LOG.info("finished to rename {} to {}. cost: {} ms", srcPath, destPath, (System.currentTimeMillis() - start)); - return Status.OK; - } - - @Override - public Status delete(String remotePath) { - try { - URI pathUri = URI.create(remotePath); - Path inputFilePath = new Path(pathUri.getPath()); - FileSystem fileSystem = nativeFileSystem(remotePath); - authenticator.doAs(() -> fileSystem.delete(inputFilePath, true)); - } catch (UserException e) { - return new Status(Status.ErrCode.COMMON_ERROR, e.getMessage()); - } catch (IOException e) { - LOG.warn("errors while delete path " + remotePath); - return new Status(Status.ErrCode.COMMON_ERROR, - "failed to delete remote path: " + remotePath + ", msg: " + e.getMessage()); - } - LOG.info("finished to delete remote path {}.", remotePath); - return Status.OK; - } - - /** - * get files in remotePath of HDFS. - * - * @param remotePath hdfs://namenode:port/path. - * @param result files in remotePath. - * @param fileNameOnly means get file only in remotePath if true. - * @return Status.OK if success. - */ - @Override - public Status globList(String remotePath, List result, boolean fileNameOnly) { - try { - URI pathUri = URI.create(remotePath); - FileSystem fileSystem = nativeFileSystem(remotePath); - Path pathPattern = new Path(pathUri.getPath()); - FileStatus[] files = authenticator.doAs(() -> fileSystem.globStatus(pathPattern)); - if (files == null) { - LOG.info("no files in path " + remotePath); - return Status.OK; - } - for (FileStatus fileStatus : files) { - RemoteFile remoteFile = new RemoteFile( - fileNameOnly ? fileStatus.getPath().getName() : fileStatus.getPath().toString(), - !fileStatus.isDirectory(), fileStatus.isDirectory() ? -1 : fileStatus.getLen(), - fileStatus.getBlockSize(), fileStatus.getModificationTime()); - result.add(remoteFile); - } - } catch (FileNotFoundException e) { - LOG.info("file not found: " + e.getMessage()); - return new Status(Status.ErrCode.NOT_FOUND, "file not found: " + e.getMessage()); - } catch (Exception e) { - LOG.warn("errors while get file status ", e); - return new Status(Status.ErrCode.COMMON_ERROR, "errors while get file status " + e.getMessage()); - } - LOG.info("finish list path {}", remotePath); - return Status.OK; - } - - @Override - public Status makeDir(String remotePath) { - try { - FileSystem fileSystem = nativeFileSystem(remotePath); - if (!authenticator.doAs(() -> fileSystem.mkdirs(new Path(remotePath)))) { - LOG.warn("failed to make dir for " + remotePath); - return new Status(Status.ErrCode.COMMON_ERROR, "failed to make dir for " + remotePath); - } - } catch (Exception e) { - LOG.warn("failed to make dir for " + remotePath); - return new Status(Status.ErrCode.COMMON_ERROR, e.getMessage()); - } - return Status.OK; - } - - @VisibleForTesting - public HadoopAuthenticator getAuthenticator() { - return authenticator; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/JFSFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/JFSFileSystem.java deleted file mode 100644 index 68bef7340cc2e1..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/JFSFileSystem.java +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote.dfs; - -import org.apache.doris.analysis.StorageBackend; -import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties; - -public class JFSFileSystem extends DFSFileSystem { - public JFSFileSystem(HdfsCompatibleProperties hdfsProperties) { - super(hdfsProperties, StorageBackend.StorageType.JFS); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/OFSFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/OFSFileSystem.java deleted file mode 100644 index a86bfd038e62eb..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/fsv2/remote/dfs/OFSFileSystem.java +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.remote.dfs; - -import org.apache.doris.analysis.StorageBackend; -import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties; - -public class OFSFileSystem extends DFSFileSystem { - public OFSFileSystem(HdfsCompatibleProperties properties) { - super(properties, StorageBackend.StorageType.OFS); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 74174a5105cfe1..fc57a62ef887ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -71,9 +71,9 @@ import org.apache.doris.datasource.paimon.source.PaimonScanNode; import org.apache.doris.datasource.trinoconnector.TrinoConnectorExternalTable; import org.apache.doris.datasource.trinoconnector.source.TrinoConnectorScanNode; -import org.apache.doris.fsv2.DirectoryLister; -import org.apache.doris.fsv2.FileSystemDirectoryLister; -import org.apache.doris.fsv2.TransactionScopeCachingDirectoryListerFactory; +import org.apache.doris.fs.DirectoryLister; +import org.apache.doris.fs.FileSystemDirectoryLister; +import org.apache.doris.fs.TransactionScopeCachingDirectoryListerFactory; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.properties.DistributionSpec; import org.apache.doris.nereids.properties.DistributionSpecAllSingleton; diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java index 5839643d770a46..65f0c2bd5e3cb3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java @@ -19,7 +19,7 @@ import org.apache.doris.datasource.hive.HMSTransaction; import org.apache.doris.datasource.hive.HiveMetadataOps; -import org.apache.doris.fsv2.FileSystemProvider; +import org.apache.doris.fs.FileSystemProvider; import java.util.concurrent.Executor; diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionManagerFactory.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionManagerFactory.java index fe6699626a7424..b8898d9b279e32 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionManagerFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionManagerFactory.java @@ -19,7 +19,7 @@ import org.apache.doris.datasource.hive.HiveMetadataOps; import org.apache.doris.datasource.iceberg.IcebergMetadataOps; -import org.apache.doris.fsv2.FileSystemProvider; +import org.apache.doris.fs.FileSystemProvider; import java.util.concurrent.Executor; diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/BackupJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/BackupJobTest.java index a8c2960868adde..935c560178b9e7 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/BackupJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/BackupJobTest.java @@ -32,7 +32,7 @@ import org.apache.doris.common.jmockit.Deencapsulation; import org.apache.doris.common.util.UnitTestUtil; import org.apache.doris.datasource.InternalCatalog; -import org.apache.doris.fsv2.FileSystemFactory; +import org.apache.doris.fs.FileSystemFactory; import org.apache.doris.persist.EditLog; import org.apache.doris.task.AgentBatchTask; import org.apache.doris.task.AgentTask; @@ -127,7 +127,7 @@ public Repository getRepo(long repoId) { private EditLog editLog; private Repository repo = new Repository(repoId, "repo", false, "my_repo", - FileSystemFactory.get("broker", Maps.newHashMap()), null); + FileSystemFactory.get("broker", Maps.newHashMap())); @BeforeClass public static void start() { diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/RepositoryTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/RepositoryTest.java index c8fec2b64033f6..9d15be43a8a7ed 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/RepositoryTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/RepositoryTest.java @@ -22,9 +22,9 @@ import org.apache.doris.catalog.FsBroker; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.UserException; -import org.apache.doris.fsv2.FileSystemFactory; -import org.apache.doris.fsv2.remote.RemoteFile; -import org.apache.doris.fsv2.remote.RemoteFileSystem; +import org.apache.doris.fs.FileSystemFactory; +import org.apache.doris.fs.remote.RemoteFile; +import org.apache.doris.fs.remote.RemoteFileSystem; import org.apache.doris.service.FrontendOptions; import com.google.common.collect.Lists; @@ -100,7 +100,7 @@ public FsBroker getBroker(String name, String host) throws AnalysisException { @Test public void testGet() { - repo = new Repository(10000, "repo", false, location, fileSystem, null); + repo = new Repository(10000, "repo", false, location, fileSystem); Assert.assertEquals(repoId, repo.getId()); Assert.assertEquals(name, repo.getName()); @@ -128,7 +128,7 @@ public Status list(String remotePath, List result) { } }; - repo = new Repository(10000, "repo", false, location, fileSystem, null); + repo = new Repository(10000, "repo", false, location, fileSystem); Status st = repo.initRepository(); System.out.println(st); @@ -137,7 +137,7 @@ public Status list(String remotePath, List result) { @Test public void testassemnblePath() throws MalformedURLException, URISyntaxException { - repo = new Repository(10000, "repo", false, location, fileSystem, null); + repo = new Repository(10000, "repo", false, location, fileSystem); // job info String label = "label"; @@ -178,7 +178,7 @@ public void testPing() { } }; - repo = new Repository(10000, "repo", false, location, fileSystem, null); + repo = new Repository(10000, "repo", false, location, fileSystem); Assert.assertTrue(repo.ping()); Assert.assertTrue(repo.getErrorMsg() == null); } @@ -199,7 +199,7 @@ public Status list(String remotePath, List result) { } }; - repo = new Repository(10000, "repo", false, location, fileSystem, null); + repo = new Repository(10000, "repo", false, location, fileSystem); List snapshotNames = Lists.newArrayList(); Status st = repo.listSnapshots(snapshotNames); Assert.assertTrue(st.ok()); @@ -225,7 +225,7 @@ public void testUpload() { } }; - repo = new Repository(10000, "repo", false, location, fileSystem, null); + repo = new Repository(10000, "repo", false, location, fileSystem); String localFilePath = "./tmp_" + System.currentTimeMillis(); try (PrintWriter out = new PrintWriter(localFilePath)) { out.print("a"); @@ -272,7 +272,7 @@ public Status list(String remotePath, List result) { } }; - repo = new Repository(10000, "repo", false, location, fileSystem, null); + repo = new Repository(10000, "repo", false, location, fileSystem); String remoteFilePath = location + "/remote_file"; Status st = repo.download(remoteFilePath, localFilePath); Assert.assertTrue(st.ok()); @@ -283,7 +283,7 @@ public Status list(String remotePath, List result) { @Test public void testGetInfo() { - repo = new Repository(10000, "repo", false, location, fileSystem, null); + repo = new Repository(10000, "repo", false, location, fileSystem); List infos = repo.getInfo(); Assert.assertTrue(infos.size() == ShowRepositoriesStmt.TITLE_NAMES.size()); } @@ -311,7 +311,7 @@ public Status list(String remotePath, List result) { } }; - repo = new Repository(10000, "repo", false, location, fileSystem, null); + repo = new Repository(10000, "repo", false, location, fileSystem); String snapshotName = ""; String timestamp = ""; try { @@ -332,7 +332,7 @@ public void testPersist() throws UserException { properties.put("bos_accesskey", "a"); properties.put("bos_secret_accesskey", "b"); RemoteFileSystem fs = FileSystemFactory.get(properties); - repo = new Repository(10000, "repo", false, location, fs, null); + repo = new Repository(10000, "repo", false, location, fs); File file = new File("./Repository"); try { @@ -362,7 +362,7 @@ public void testPersist() throws UserException { public void testPathNormalize() { String newLoc = "bos://cmy_bucket/bos_repo/"; - repo = new Repository(10000, "repo", false, newLoc, fileSystem, null); + repo = new Repository(10000, "repo", false, newLoc, fileSystem); String path = repo.getRepoPath("label1", "/_ss_my_ss/_ss_content/__db_10000/"); Assert.assertEquals("bos://cmy_bucket/bos_repo/__palo_repository_repo/__ss_label1/__ss_content/_ss_my_ss/_ss_content/__db_10000/", path); @@ -370,7 +370,7 @@ public void testPathNormalize() { Assert.assertEquals("bos://cmy_bucket/bos_repo/__palo_repository_repo/__ss_label1/__ss_content/_ss_my_ss/_ss_content/__db_10000", path); newLoc = "hdfs://path/to/repo"; - repo = new Repository(10000, "repo", false, newLoc, fileSystem, null); + repo = new Repository(10000, "repo", false, newLoc, fileSystem); SnapshotInfo snapshotInfo = new SnapshotInfo(1, 2, 3, 4, 5, 6, 7, "/path", Lists.newArrayList()); path = repo.getRepoTabletPathBySnapshotInfo("label1", snapshotInfo); Assert.assertEquals("hdfs://path/to/repo/__palo_repository_repo/__ss_label1/__ss_content/__db_1/__tbl_2/__part_3/__idx_4/__5", path); diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java index db3c7944dd91d3..696f669b9c223b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java @@ -40,7 +40,7 @@ import org.apache.doris.common.UserException; import org.apache.doris.common.jmockit.Deencapsulation; import org.apache.doris.datasource.InternalCatalog; -import org.apache.doris.fsv2.FileSystemFactory; +import org.apache.doris.fs.FileSystemFactory; import org.apache.doris.persist.EditLog; import org.apache.doris.resource.Tag; import org.apache.doris.system.SystemInfoService; @@ -125,7 +125,7 @@ public Repository getRepo(long repoId) { @Injectable private Repository repo = new Repository(repoId, "repo", false, "bos://my_repo", - FileSystemFactory.get("broker", Maps.newHashMap()), null); + FileSystemFactory.get("broker", Maps.newHashMap())); private BackupMeta backupMeta; diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java index 034b5479758c3d..e2d7996760a57b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java @@ -20,7 +20,7 @@ import org.apache.doris.catalog.HdfsResource; import org.apache.doris.common.util.LocationPath.Scheme; import org.apache.doris.datasource.property.constants.OssProperties; -import org.apache.doris.fsv2.FileSystemType; +import org.apache.doris.fs.FileSystemType; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HiveAcidTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HiveAcidTest.java index dd97addf2b952d..a54084e9b45b29 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HiveAcidTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HiveAcidTest.java @@ -19,7 +19,7 @@ import org.apache.doris.common.info.SimpleTableInfo; import org.apache.doris.datasource.hive.HiveMetaStoreCache.FileCacheValue; -import org.apache.doris.fsv2.LocalDfsFileSystem; +import org.apache.doris.fs.LocalDfsFileSystem; import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.common.ValidReaderWriteIdList; diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java index 217057c91238fd..61b373706d9f75 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java @@ -24,10 +24,10 @@ import org.apache.doris.common.info.SimpleTableInfo; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.datasource.TestHMSCachedClient; -import org.apache.doris.fsv2.FileSystem; -import org.apache.doris.fsv2.FileSystemProvider; -import org.apache.doris.fsv2.LocalDfsFileSystem; -import org.apache.doris.fsv2.remote.SwitchingFileSystem; +import org.apache.doris.fs.FileSystem; +import org.apache.doris.fs.FileSystemProvider; +import org.apache.doris.fs.LocalDfsFileSystem; +import org.apache.doris.fs.remote.SwitchingFileSystem; import org.apache.doris.nereids.trees.plans.commands.insert.HiveInsertCommandContext; import org.apache.doris.qe.ConnectContext; import org.apache.doris.thrift.THiveLocationParams; diff --git a/fe/fe-core/src/test/java/org/apache/doris/external/iceberg/IcebergHadoopCatalogTest.java b/fe/fe-core/src/test/java/org/apache/doris/external/iceberg/IcebergHadoopCatalogTest.java index e54c0298190215..1ae9ee7e50ceff 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/external/iceberg/IcebergHadoopCatalogTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/external/iceberg/IcebergHadoopCatalogTest.java @@ -17,7 +17,6 @@ package org.apache.doris.external.iceberg; -import org.apache.doris.analysis.StorageBackend; import org.apache.doris.common.UserException; import org.apache.doris.datasource.property.PropertyConverter; import org.apache.doris.fs.FileSystemFactory; @@ -51,7 +50,7 @@ public void testHadoopCatalogListNamespaces() throws UserException, IOException properties.put("cos.region", "ap-beijing"); Map hadoopProps = PropertyConverter.convertToHadoopFSProperties(properties); String pathStr = "cosn://bucket1/namespace"; - DFSFileSystem fs = (DFSFileSystem) FileSystemFactory.get("", StorageBackend.StorageType.HDFS, hadoopProps); + DFSFileSystem fs = (DFSFileSystem) FileSystemFactory.get(hadoopProps); nativeFs = fs.nativeFileSystem(pathStr); RemoteIterator it = nativeFs.listStatusIterator(new Path(pathStr)); diff --git a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java index f8869db9cf2b5b..3f6ba202ed885b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java @@ -19,6 +19,8 @@ import org.apache.doris.backup.Status; import org.apache.doris.datasource.property.constants.S3Properties; +import org.apache.doris.datasource.property.storage.AzureProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.fs.remote.RemoteFile; import com.azure.core.http.HttpHeaders; @@ -92,12 +94,13 @@ public void testGlobList() { Map props = new HashMap(); props.put(S3Properties.ACCESS_KEY, "${account_name}"); props.put(S3Properties.SECRET_KEY, "${key}"); - props.put(S3Properties.ENDPOINT, "https://blob.azure.windows.net"); + props.put(S3Properties.ENDPOINT, "https://bucket.blob.core.windows.net"); props.put(S3Properties.BUCKET, "${container}"); List inputs = genInputs(); inputs.stream().forEach(i -> { - AzureObjStorage azs = new AzureObjStorage(props); + AzureProperties azureProps = (AzureProperties) StorageProperties.createPrimary(props); + AzureObjStorage azs = new AzureObjStorage(azureProps); List result = new ArrayList(); boolean fileNameOnly = false; // FIXME(gavin): Mock the result returned from azure blob to make this UT work when no aksk and network @@ -112,13 +115,13 @@ public void testGlobListWithMockedAzureStorage() { Map props = new HashMap(); props.put(S3Properties.ACCESS_KEY, "gavintestmocked"); props.put(S3Properties.SECRET_KEY, "sksks"); - props.put(S3Properties.ENDPOINT, "https://blob.azure.windows.net"); + props.put(S3Properties.ENDPOINT, "https://bucket.blob.core.windows.net"); props.put(S3Properties.BUCKET, "gavin-test-mocked"); List inputs = genInputs(); inputs.stream().forEach(i -> { AzureObjStorage azs = genMockedAzureObjStorage(4/*numBatches, numContinuations*/); - List result = new ArrayList(); + List result = new ArrayList<>(); boolean fileNameOnly = false; // FIXME(gavin): Mock the result returned from azure blob to make this UT work when no aksk and network Status st = azs.globList(i.pattern, result, fileNameOnly); @@ -197,9 +200,10 @@ public static AzureObjStorage genMockedAzureObjStorage(int numBatch) { Map props = new HashMap(); props.put(S3Properties.ACCESS_KEY, "gavintestus"); props.put(S3Properties.SECRET_KEY, "sksksksksksksk"); - props.put(S3Properties.ENDPOINT, "https://blob.azure.windows.net"); + props.put(S3Properties.ENDPOINT, "https://blobz.blob.core.windows.net"); props.put(S3Properties.BUCKET, "gavin-test-us"); - AzureObjStorage azs = new AzureObjStorage(props); + AzureProperties azureProps = (AzureProperties) StorageProperties.createPrimary(props); + AzureObjStorage azs = new AzureObjStorage(azureProps); List allBlobKeys = genObjectKeys(); final Integer[] batchIndex = {0}; // from 0 to numBatch new MockUp(AzureObjStorage.class) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/fsv2/obj/S3FileSystemTest.java b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3FileSystemTest.java similarity index 97% rename from fe/fe-core/src/test/java/org/apache/doris/fsv2/obj/S3FileSystemTest.java rename to fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3FileSystemTest.java index b36d0b4aba46b9..83fd3598360a42 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/fsv2/obj/S3FileSystemTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3FileSystemTest.java @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -package org.apache.doris.fsv2.obj; +package org.apache.doris.fs.obj; import org.apache.doris.backup.Repository; import org.apache.doris.backup.Status; @@ -24,10 +24,9 @@ import org.apache.doris.datasource.property.PropertyConverter; import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties; import org.apache.doris.datasource.property.storage.StorageProperties; -import org.apache.doris.fs.obj.MockedS3Client; -import org.apache.doris.fsv2.FileSystemFactory; -import org.apache.doris.fsv2.remote.RemoteFile; -import org.apache.doris.fsv2.remote.S3FileSystem; +import org.apache.doris.fs.FileSystemFactory; +import org.apache.doris.fs.remote.RemoteFile; +import org.apache.doris.fs.remote.S3FileSystem; import mockit.Mock; import mockit.MockUp; @@ -171,8 +170,7 @@ public void upload() throws IOException { @Test public void testRepositoryUpload() throws IOException { - Repository repo = new Repository(10000, "repo", false, bucket + basePath, fileSystem, - null); + Repository repo = new Repository(10000, "repo", false, bucket + basePath, fileSystem); File localFile = File.createTempFile("s3unittest", ".dat"); localFile.deleteOnExit(); String remote = bucket + basePath + "/" + localFile.getName(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageGlobListTest.java b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageGlobListTest.java index 06fa47c361c62a..1401e892ba38cd 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageGlobListTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageGlobListTest.java @@ -19,6 +19,8 @@ import org.apache.doris.backup.Status; import org.apache.doris.datasource.property.constants.S3Properties; +import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.fs.remote.RemoteFile; import mockit.Mock; @@ -124,6 +126,7 @@ public void testFsGlob() { public static S3ObjStorage genMockedS3ObjStorage(int numBatch) { Map props = new HashMap(); props.put(S3Properties.ROLE_ARN, "test_mocked_arn"); + props.put("s3.external_id", "12"); props.put(S3Properties.ENDPOINT, "https://s3.us-east-1.amazonaws.com"); props.put(S3Properties.BUCKET, "test_mocked_bucket"); List allObjKeys = genObjKeys(); @@ -160,7 +163,7 @@ ListObjectsV2Response listObjectsV2(ListObjectsV2Request listObjectsV2Request) { } }; - S3ObjStorage s3ObjStorage = new S3ObjStorage(props); + S3ObjStorage s3ObjStorage = new S3ObjStorage((AbstractS3CompatibleProperties) StorageProperties.createPrimary(props)); return s3ObjStorage; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageTest.java b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageTest.java index e565120600adab..7a3d9bd506dbb4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageTest.java @@ -18,151 +18,192 @@ package org.apache.doris.fs.obj; import org.apache.doris.backup.Status; +import org.apache.doris.common.DdlException; import org.apache.doris.common.UserException; +import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties; -import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; +import org.mockito.Mockito; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectResponse; +import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import software.amazon.awssdk.services.s3.model.UploadPartResponse; import java.io.ByteArrayInputStream; -import java.io.File; -import java.lang.reflect.Field; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.io.InputStream; + +public class S3ObjStorageTest { + private S3ObjStorage storage; + private S3Client mockClient; + private AbstractS3CompatibleProperties mockProperties; + + @BeforeEach + void setUp() throws UserException { + mockProperties = Mockito.mock(AbstractS3CompatibleProperties.class); + Mockito.when(mockProperties.getEndpoint()).thenReturn("http://s3.example.com"); + Mockito.when(mockProperties.getRegion()).thenReturn("us-east-1"); + Mockito.when(mockProperties.getUsePathStyle()).thenReturn("false"); + Mockito.when(mockProperties.getForceParsingByStandardUrl()).thenReturn("false"); + // storage = new S3ObjStorage(mockProperties); + mockClient = Mockito.mock(S3Client.class); + storage = Mockito.spy(new S3ObjStorage(mockProperties)); + Mockito.doReturn(mockClient).when(storage).getClient(); + } + + @Test + @DisplayName("getClient should return a valid S3Client instance") + void getClientReturnsValidS3Client() throws UserException { + S3Client client = storage.getClient(); + Assertions.assertNotNull(client); + } + + @Test + @DisplayName("headObject should return OK status when object exists") + void headObjectReturnsOkWhenObjectExists() throws UserException { + Mockito.when(mockClient.headObject(Mockito.any(HeadObjectRequest.class))) + .thenReturn(HeadObjectResponse.builder().build()); + + Status status = storage.headObject("s3://bucket/key"); + Assertions.assertEquals(Status.OK, status); + } + + @Test + @DisplayName("headObject should return NOT_FOUND status when object does not exist") + void headObjectReturnsNotFoundWhenObjectDoesNotExist() throws UserException { + Mockito.when(mockClient.headObject(Mockito.any(HeadObjectRequest.class))) + .thenThrow(S3Exception.builder().statusCode(404).build()); + + Status status = storage.headObject("s3://bucket/nonexistent-key"); + Assertions.assertEquals(Status.ErrCode.NOT_FOUND, status.getErrCode()); + } + + @Test + @DisplayName("headObject should return COMMON_ERROR status for other exceptions") + void headObjectReturnsErrorForOtherExceptions() throws UserException { + Mockito.when(mockClient.headObject(Mockito.any(HeadObjectRequest.class))) + .thenThrow(S3Exception.builder().statusCode(500).build()); + + Status status = storage.headObject("s3://bucket/key"); + Assertions.assertEquals(Status.ErrCode.COMMON_ERROR, status.getErrCode()); + } + + @Test + @DisplayName("putObject should return OK status when upload succeeds") + void putObjectReturnsOkWhenUploadSucceeds() throws UserException { + Mockito.when(mockClient.putObject(Mockito.any(PutObjectRequest.class), Mockito.any(RequestBody.class))) + .thenReturn(PutObjectResponse.builder().build()); + + InputStream content = new ByteArrayInputStream("test content".getBytes()); + Status status = storage.putObject("s3://bucket/key", content, 12); + Assertions.assertEquals(Status.OK, status); + } + + @Test + @DisplayName("putObject should return COMMON_ERROR status when upload fails") + void putObjectReturnsErrorWhenUploadFails() throws UserException { + Mockito.when(mockClient.putObject(Mockito.any(PutObjectRequest.class), Mockito.any(RequestBody.class))) + .thenThrow(S3Exception.builder().statusCode(500).build()); + + InputStream content = new ByteArrayInputStream("test content".getBytes()); + Status status = storage.putObject("s3://bucket/key", content, 12); + Assertions.assertEquals(Status.ErrCode.COMMON_ERROR, status.getErrCode()); + } + + @Test + @DisplayName("deleteObject should return OK status when object is deleted successfully") + void deleteObjectReturnsOkWhenDeletionSucceeds() throws UserException { + Mockito.when(mockClient.deleteObject(Mockito.any(DeleteObjectRequest.class))) + .thenReturn(DeleteObjectResponse.builder().build()); + + Status status = storage.deleteObject("s3://bucket/key"); + Assertions.assertEquals(Status.OK, status); + } + + @Test + @DisplayName("deleteObject should return OK status when object does not exist") + void deleteObjectReturnsOkWhenObjectDoesNotExist() throws UserException { + Mockito.when(mockClient.deleteObject(Mockito.any(DeleteObjectRequest.class))) + .thenThrow(S3Exception.builder().statusCode(404).build()); + + Status status = storage.deleteObject("s3://bucket/nonexistent-key"); + Assertions.assertEquals(Status.OK, status); + } + @Test + @DisplayName("deleteObject should return COMMON_ERROR status for other exceptions") + void deleteObjectReturnsErrorForOtherExceptions() throws UserException { + Mockito.when(mockClient.deleteObject(Mockito.any(DeleteObjectRequest.class))) + .thenThrow(S3Exception.builder().statusCode(500).build()); + + Status status = storage.deleteObject("s3://bucket/key"); + Assertions.assertEquals(Status.ErrCode.COMMON_ERROR, status.getErrCode()); + } + + @Test + @DisplayName("listObjects should return a list of objects when objects exist") + void listObjectsReturnsObjectsWhenObjectsExist() throws UserException { + ListObjectsV2Response response = ListObjectsV2Response.builder() + .contents(S3Object.builder().key("prefix/key1").size(100L).build(), + S3Object.builder().key("prefix/key2").size(200L).build()) + .isTruncated(false) + .build(); + Mockito.when(mockClient.listObjectsV2(Mockito.any(ListObjectsV2Request.class))).thenReturn(response); + + RemoteObjects objects = storage.listObjects("s3://bucket/prefix", null); + Assertions.assertEquals(2, objects.getObjectList().size()); + } + + @Test + @DisplayName("listObjects should throw DdlException for errors") + void listObjectsThrowsExceptionForErrors() throws UserException { + Mockito.when(mockClient.listObjectsV2(Mockito.any(ListObjectsV2Request.class))) + .thenThrow(S3Exception.builder().statusCode(500).build()); + + Assertions.assertThrows(DdlException.class, () -> storage.listObjects("s3://bucket/prefix", null)); + } -@TestInstance(TestInstance.Lifecycle.PER_CLASS) -class S3ObjStorageTest { @Test - public void testS3BaseOp() throws UserException { - String ak = System.getenv("S3_ACCESS_KEY"); - String sk = System.getenv("S3_SECRET_KEY"); - String endpoint = System.getenv("S3_ENDPOINT"); - String region = System.getenv("S3_REGION"); - String bucket = System.getenv("S3_BUCKET"); - String prefix = System.getenv("S3_PREFIX"); - - // Skip this test if ENV variables are not set. - if (StringUtils.isEmpty(endpoint) || StringUtils.isEmpty(ak) - || StringUtils.isEmpty(sk) || StringUtils.isEmpty(region) - || StringUtils.isEmpty(bucket) || StringUtils.isEmpty(prefix)) { - return; - } - - Map properties = new HashMap<>(); - properties.put("s3.endpoint", endpoint); - properties.put("s3.access_key", ak); - properties.put("s3.secret_key", sk); - properties.put("s3.region", region); - S3ObjStorage storage = new S3ObjStorage(properties); - - String baseUrl = "s3://" + bucket + "/" + prefix + "/"; - String content = "mocked"; - for (int i = 0; i < 5; ++i) { - Status st = storage.putObject(baseUrl + "key" + i, - new ByteArrayInputStream(content.getBytes()), content.length()); - Assertions.assertEquals(Status.OK, st); - } - - RemoteObjects remoteObjects = storage.listObjects(baseUrl, null); - Assertions.assertEquals(5, remoteObjects.getObjectList().size()); - Assertions.assertFalse(remoteObjects.isTruncated()); - Assertions.assertEquals(null, remoteObjects.getContinuationToken()); - - List objectList = remoteObjects.getObjectList(); - for (int i = 0; i < objectList.size(); i++) { - RemoteObject remoteObject = objectList.get(i); - Assertions.assertEquals("key" + i, remoteObject.getRelativePath()); - } - - Status st = storage.headObject(baseUrl + "key" + 0); - Assertions.assertEquals(Status.OK, st); - - File file = new File("test-file.txt"); - file.delete(); - st = storage.getObject(baseUrl + "key" + 0, file); - Assertions.assertEquals(Status.OK, st); - - st = storage.deleteObject(baseUrl + "key" + 0); - Assertions.assertEquals(Status.OK, st); - - file.delete(); - st = storage.getObject(baseUrl + "key" + 0, file); - Assertions.assertEquals(Status.ErrCode.COMMON_ERROR, st.getErrCode()); - Assertions.assertTrue(st.getErrMsg().contains("The specified key does not exist")); - file.delete(); - - st = storage.deleteObjects(baseUrl); - Assertions.assertEquals(Status.OK, st); - - remoteObjects = storage.listObjects(baseUrl, null); - Assertions.assertEquals(0, remoteObjects.getObjectList().size()); - Assertions.assertFalse(remoteObjects.isTruncated()); - Assertions.assertEquals(null, remoteObjects.getContinuationToken()); + @DisplayName("multipartUpload should return OK status when upload succeeds") + void multipartUploadReturnsOkWhenUploadSucceeds() throws Exception { + Mockito.when(mockClient.createMultipartUpload(Mockito.any(CreateMultipartUploadRequest.class))) + .thenReturn(CreateMultipartUploadResponse.builder().uploadId("uploadId").build()); + Mockito.when(mockClient.uploadPart(Mockito.any(UploadPartRequest.class), Mockito.any(RequestBody.class))) + .thenReturn(UploadPartResponse.builder().eTag("etag").build()); + Mockito.when(mockClient.completeMultipartUpload(Mockito.any(CompleteMultipartUploadRequest.class))) + .thenReturn(CompleteMultipartUploadResponse.builder().build()); + + InputStream content = new ByteArrayInputStream(new byte[10 * 1024 * 1024]); // 10 MB + Status status = storage.multipartUpload("s3://bucket/key", content, 10 * 1024 * 1024); + Assertions.assertEquals(Status.OK, status); } @Test - public void testBaseOp() throws Exception { - Map properties = new HashMap<>(); - properties.put("s3.endpoint", "s3.e.c"); - properties.put("s3.access_key", "abc"); - properties.put("s3.secret_key", "123"); - S3ObjStorage storage = new S3ObjStorage(properties); - Field client = storage.getClass().getDeclaredField("client"); - client.setAccessible(true); - MockedS3Client mockedClient = new MockedS3Client(); - client.set(storage, mockedClient); - Assertions.assertTrue(storage.getClient() instanceof MockedS3Client); - - Status st = storage.headObject("s3://bucket/key"); - Assertions.assertEquals(Status.OK, st); - - mockedClient.setMockedData(new byte[0]); - st = storage.getObject("s3://bucket/key", new File("/mocked/file")); - Assertions.assertEquals(Status.OK, st); - - String content = "mocked"; - for (int i = 0; i < 5; i++) { - st = storage.putObject("s3://bucket/keys/key" + i, - new ByteArrayInputStream(content.getBytes()), content.length()); - Assertions.assertEquals(Status.OK, st); - } - st = storage.copyObject("s3://bucket/key", "s3://bucket/key1"); - Assertions.assertEquals(Status.OK, st); - - st = storage.deleteObject("s3://bucket/key"); - Assertions.assertEquals(Status.OK, st); - - RemoteObjects remoteObjects = storage.listObjects("s3://bucket/keys", null); - Assertions.assertEquals(5, remoteObjects.getObjectList().size()); - Assertions.assertTrue(remoteObjects.isTruncated()); - Assertions.assertEquals("next-token", remoteObjects.getContinuationToken()); - - List objectList = remoteObjects.getObjectList(); - for (int i = 0; i < objectList.size(); i++) { - RemoteObject remoteObject = objectList.get(i); - Assertions.assertEquals("key" + i, remoteObject.getRelativePath()); - } - - storage.properties.put("use_path_style", "false"); - storage.properties.put("s3.endpoint", "oss.a.c"); - storage.setProperties(storage.properties); - RemoteObjects remoteObjectsVBucket = storage.listObjects("oss://bucket/keys", null); - List list = remoteObjectsVBucket.getObjectList(); - for (int i = 0; i < list.size(); i++) { - RemoteObject remoteObject = list.get(i); - Assertions.assertTrue(remoteObject.getRelativePath().startsWith("key" + i)); - } - - storage.properties.put("use_path_style", "true"); - storage.setProperties(storage.properties); - remoteObjectsVBucket = storage.listObjects("oss://bucket/keys", null); - list = remoteObjectsVBucket.getObjectList(); - for (int i = 0; i < list.size(); i++) { - RemoteObject remoteObject = list.get(i); - Assertions.assertTrue(remoteObject.getRelativePath().startsWith("key" + i)); - } + @DisplayName("multipartUpload should return COMMON_ERROR status when upload fails") + void multipartUploadReturnsErrorWhenUploadFails() throws Exception { + Mockito.when(mockClient.createMultipartUpload(Mockito.any(CreateMultipartUploadRequest.class))) + .thenReturn(CreateMultipartUploadResponse.builder().uploadId("uploadId").build()); + Mockito.when(mockClient.uploadPart(Mockito.any(UploadPartRequest.class), Mockito.any(RequestBody.class))) + .thenThrow(S3Exception.builder().statusCode(500).build()); + + InputStream content = new ByteArrayInputStream(new byte[10 * 1024 * 1024]); // 10 MB + Status status = storage.multipartUpload("s3://bucket/key", content, 10 * 1024 * 1024); + Assertions.assertEquals(Status.ErrCode.COMMON_ERROR, status.getErrCode()); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/fsv2/remote/RemoteFileSystemTest.java b/fe/fe-core/src/test/java/org/apache/doris/fs/remote/RemoteFileSystemTest.java similarity index 99% rename from fe/fe-core/src/test/java/org/apache/doris/fsv2/remote/RemoteFileSystemTest.java rename to fe/fe-core/src/test/java/org/apache/doris/fs/remote/RemoteFileSystemTest.java index df5225ff3c0557..9ad929edb224d8 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/fsv2/remote/RemoteFileSystemTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/fs/remote/RemoteFileSystemTest.java @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -package org.apache.doris.fsv2.remote; +package org.apache.doris.fs.remote; import org.apache.doris.analysis.StorageBackend; import org.apache.doris.backup.Status; diff --git a/fe/fe-core/src/test/java/org/apache/doris/fsv2/obj/S3ObjStorageTest.java b/fe/fe-core/src/test/java/org/apache/doris/fsv2/obj/S3ObjStorageTest.java deleted file mode 100644 index f655a9d5654c9d..00000000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/fsv2/obj/S3ObjStorageTest.java +++ /dev/null @@ -1,209 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.fsv2.obj; - -import org.apache.doris.backup.Status; -import org.apache.doris.common.DdlException; -import org.apache.doris.common.UserException; -import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; -import org.mockito.Mockito; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; -import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; -import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; -import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; -import software.amazon.awssdk.services.s3.model.DeleteObjectResponse; -import software.amazon.awssdk.services.s3.model.HeadObjectRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectResponse; -import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; -import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import software.amazon.awssdk.services.s3.model.S3Exception; -import software.amazon.awssdk.services.s3.model.S3Object; -import software.amazon.awssdk.services.s3.model.UploadPartRequest; -import software.amazon.awssdk.services.s3.model.UploadPartResponse; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; - -public class S3ObjStorageTest { - private S3ObjStorage storage; - private S3Client mockClient; - private AbstractS3CompatibleProperties mockProperties; - - @BeforeEach - void setUp() throws UserException { - mockProperties = Mockito.mock(AbstractS3CompatibleProperties.class); - Mockito.when(mockProperties.getEndpoint()).thenReturn("http://s3.example.com"); - Mockito.when(mockProperties.getRegion()).thenReturn("us-east-1"); - Mockito.when(mockProperties.getUsePathStyle()).thenReturn("false"); - Mockito.when(mockProperties.getForceParsingByStandardUrl()).thenReturn("false"); - // storage = new S3ObjStorage(mockProperties); - mockClient = Mockito.mock(S3Client.class); - storage = Mockito.spy(new S3ObjStorage(mockProperties)); - Mockito.doReturn(mockClient).when(storage).getClient(); - } - - @Test - @DisplayName("getClient should return a valid S3Client instance") - void getClientReturnsValidS3Client() throws UserException { - S3Client client = storage.getClient(); - Assertions.assertNotNull(client); - } - - @Test - @DisplayName("headObject should return OK status when object exists") - void headObjectReturnsOkWhenObjectExists() throws UserException { - Mockito.when(mockClient.headObject(Mockito.any(HeadObjectRequest.class))) - .thenReturn(HeadObjectResponse.builder().build()); - - Status status = storage.headObject("s3://bucket/key"); - Assertions.assertEquals(Status.OK, status); - } - - @Test - @DisplayName("headObject should return NOT_FOUND status when object does not exist") - void headObjectReturnsNotFoundWhenObjectDoesNotExist() throws UserException { - Mockito.when(mockClient.headObject(Mockito.any(HeadObjectRequest.class))) - .thenThrow(S3Exception.builder().statusCode(404).build()); - - Status status = storage.headObject("s3://bucket/nonexistent-key"); - Assertions.assertEquals(Status.ErrCode.NOT_FOUND, status.getErrCode()); - } - - @Test - @DisplayName("headObject should return COMMON_ERROR status for other exceptions") - void headObjectReturnsErrorForOtherExceptions() throws UserException { - Mockito.when(mockClient.headObject(Mockito.any(HeadObjectRequest.class))) - .thenThrow(S3Exception.builder().statusCode(500).build()); - - Status status = storage.headObject("s3://bucket/key"); - Assertions.assertEquals(Status.ErrCode.COMMON_ERROR, status.getErrCode()); - } - - @Test - @DisplayName("putObject should return OK status when upload succeeds") - void putObjectReturnsOkWhenUploadSucceeds() throws UserException { - Mockito.when(mockClient.putObject(Mockito.any(PutObjectRequest.class), Mockito.any(RequestBody.class))) - .thenReturn(PutObjectResponse.builder().build()); - - InputStream content = new ByteArrayInputStream("test content".getBytes()); - Status status = storage.putObject("s3://bucket/key", content, 12); - Assertions.assertEquals(Status.OK, status); - } - - @Test - @DisplayName("putObject should return COMMON_ERROR status when upload fails") - void putObjectReturnsErrorWhenUploadFails() throws UserException { - Mockito.when(mockClient.putObject(Mockito.any(PutObjectRequest.class), Mockito.any(RequestBody.class))) - .thenThrow(S3Exception.builder().statusCode(500).build()); - - InputStream content = new ByteArrayInputStream("test content".getBytes()); - Status status = storage.putObject("s3://bucket/key", content, 12); - Assertions.assertEquals(Status.ErrCode.COMMON_ERROR, status.getErrCode()); - } - - @Test - @DisplayName("deleteObject should return OK status when object is deleted successfully") - void deleteObjectReturnsOkWhenDeletionSucceeds() throws UserException { - Mockito.when(mockClient.deleteObject(Mockito.any(DeleteObjectRequest.class))) - .thenReturn(DeleteObjectResponse.builder().build()); - - Status status = storage.deleteObject("s3://bucket/key"); - Assertions.assertEquals(Status.OK, status); - } - - @Test - @DisplayName("deleteObject should return OK status when object does not exist") - void deleteObjectReturnsOkWhenObjectDoesNotExist() throws UserException { - Mockito.when(mockClient.deleteObject(Mockito.any(DeleteObjectRequest.class))) - .thenThrow(S3Exception.builder().statusCode(404).build()); - - Status status = storage.deleteObject("s3://bucket/nonexistent-key"); - Assertions.assertEquals(Status.OK, status); - } - - @Test - @DisplayName("deleteObject should return COMMON_ERROR status for other exceptions") - void deleteObjectReturnsErrorForOtherExceptions() throws UserException { - Mockito.when(mockClient.deleteObject(Mockito.any(DeleteObjectRequest.class))) - .thenThrow(S3Exception.builder().statusCode(500).build()); - - Status status = storage.deleteObject("s3://bucket/key"); - Assertions.assertEquals(Status.ErrCode.COMMON_ERROR, status.getErrCode()); - } - - @Test - @DisplayName("listObjects should return a list of objects when objects exist") - void listObjectsReturnsObjectsWhenObjectsExist() throws UserException { - ListObjectsV2Response response = ListObjectsV2Response.builder() - .contents(S3Object.builder().key("prefix/key1").size(100L).build(), - S3Object.builder().key("prefix/key2").size(200L).build()) - .isTruncated(false) - .build(); - Mockito.when(mockClient.listObjectsV2(Mockito.any(ListObjectsV2Request.class))).thenReturn(response); - - RemoteObjects objects = storage.listObjects("s3://bucket/prefix", null); - Assertions.assertEquals(2, objects.getObjectList().size()); - } - - @Test - @DisplayName("listObjects should throw DdlException for errors") - void listObjectsThrowsExceptionForErrors() throws UserException { - Mockito.when(mockClient.listObjectsV2(Mockito.any(ListObjectsV2Request.class))) - .thenThrow(S3Exception.builder().statusCode(500).build()); - - Assertions.assertThrows(DdlException.class, () -> storage.listObjects("s3://bucket/prefix", null)); - } - - @Test - @DisplayName("multipartUpload should return OK status when upload succeeds") - void multipartUploadReturnsOkWhenUploadSucceeds() throws Exception { - Mockito.when(mockClient.createMultipartUpload(Mockito.any(CreateMultipartUploadRequest.class))) - .thenReturn(CreateMultipartUploadResponse.builder().uploadId("uploadId").build()); - Mockito.when(mockClient.uploadPart(Mockito.any(UploadPartRequest.class), Mockito.any(RequestBody.class))) - .thenReturn(UploadPartResponse.builder().eTag("etag").build()); - Mockito.when(mockClient.completeMultipartUpload(Mockito.any(CompleteMultipartUploadRequest.class))) - .thenReturn(CompleteMultipartUploadResponse.builder().build()); - - InputStream content = new ByteArrayInputStream(new byte[10 * 1024 * 1024]); // 10 MB - Status status = storage.multipartUpload("s3://bucket/key", content, 10 * 1024 * 1024); - Assertions.assertEquals(Status.OK, status); - } - - @Test - @DisplayName("multipartUpload should return COMMON_ERROR status when upload fails") - void multipartUploadReturnsErrorWhenUploadFails() throws Exception { - Mockito.when(mockClient.createMultipartUpload(Mockito.any(CreateMultipartUploadRequest.class))) - .thenReturn(CreateMultipartUploadResponse.builder().uploadId("uploadId").build()); - Mockito.when(mockClient.uploadPart(Mockito.any(UploadPartRequest.class), Mockito.any(RequestBody.class))) - .thenThrow(S3Exception.builder().statusCode(500).build()); - - InputStream content = new ByteArrayInputStream(new byte[10 * 1024 * 1024]); // 10 MB - Status status = storage.multipartUpload("s3://bucket/key", content, 10 * 1024 * 1024); - Assertions.assertEquals(Status.ErrCode.COMMON_ERROR, status.getErrCode()); - } -} From 67edfa512a42492327ece69e8129fa576012e838 Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Fri, 6 Jun 2025 14:00:02 +0800 Subject: [PATCH 2/7] test --- .../doris/datasource/property/PropertyConverterTest.java | 4 ++-- .../trees/plans/commands/CreateRepositoryCommandTest.java | 4 ++-- .../vault_p0/create/test_create_vault_with_kerberos.groovy | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java index b7912e449faa10..2dffab4534122c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java @@ -182,7 +182,7 @@ public void testS3RepositoryPropertiesConverter() throws Exception { CreateRepositoryStmt analyzedStmt = createStmt(s3Repo); Assertions.assertEquals(analyzedStmt.getProperties().size(), 4); Repository repository = getRepository(analyzedStmt, "s3_repo"); - Assertions.assertEquals(4, repository.getRemoteFileSystem().getProperties().size()); + Assertions.assertEquals(5, repository.getRemoteFileSystem().getProperties().size()); String s3RepoNew = "CREATE REPOSITORY `s3_repo_new`\n" + "WITH S3\n" @@ -196,7 +196,7 @@ public void testS3RepositoryPropertiesConverter() throws Exception { CreateRepositoryStmt analyzedStmtNew = createStmt(s3RepoNew); Assertions.assertEquals(analyzedStmtNew.getProperties().size(), 3); Repository repositoryNew = getRepository(analyzedStmtNew, "s3_repo_new"); - Assertions.assertEquals(repositoryNew.getRemoteFileSystem().getProperties().size(), 3); + Assertions.assertEquals(4, repositoryNew.getRemoteFileSystem().getProperties().size()); } private static Repository getRepository(CreateRepositoryStmt analyzedStmt, String name) throws DdlException { diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateRepositoryCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateRepositoryCommandTest.java index 8362f5cc233027..21595cc4cce588 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateRepositoryCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateRepositoryCommandTest.java @@ -72,7 +72,7 @@ public void testS3RepositoryPropertiesConverter() throws Exception { Assertions.assertTrue(logicalPlan instanceof CreateRepositoryCommand); Assertions.assertEquals(((CreateRepositoryCommand) logicalPlan).getProperties().size(), 4); Repository repository = getRepository((CreateRepositoryCommand) logicalPlan, "s3_repo_command"); - Assertions.assertEquals(4, repository.getRemoteFileSystem().getProperties().size()); + Assertions.assertEquals(5, repository.getRemoteFileSystem().getProperties().size()); String s3RepoNew = "CREATE REPOSITORY `s3_repo_new_command`\n" + "WITH S3\n" @@ -88,7 +88,7 @@ public void testS3RepositoryPropertiesConverter() throws Exception { Assertions.assertTrue(logicalPlan1 instanceof CreateRepositoryCommand); Assertions.assertEquals(((CreateRepositoryCommand) logicalPlan1).getProperties().size(), 3); Repository repositoryNew = getRepository((CreateRepositoryCommand) logicalPlan1, "s3_repo_new_command"); - Assertions.assertEquals(repositoryNew.getRemoteFileSystem().getProperties().size(), 3); + Assertions.assertEquals(repositoryNew.getRemoteFileSystem().getProperties().size(), 4); } @Disabled("not support") diff --git a/regression-test/suites/vault_p0/create/test_create_vault_with_kerberos.groovy b/regression-test/suites/vault_p0/create/test_create_vault_with_kerberos.groovy index bed903cfd9086c..e774f3e3f31a9b 100644 --- a/regression-test/suites/vault_p0/create/test_create_vault_with_kerberos.groovy +++ b/regression-test/suites/vault_p0/create/test_create_vault_with_kerberos.groovy @@ -84,7 +84,7 @@ suite("test_create_vault_with_kerberos", "nonConcurrent") { "hadoop.security.authentication" = "kerberos" ); """ - }, "hadoop.kerberos.principal is required for kerberos") + }, "HDFS authentication type is kerberos, but principal or keytab is not set") sql """ From 905dcbd38ae029e4e080d115e318e79d714ff796 Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Fri, 6 Jun 2025 23:03:41 +0800 Subject: [PATCH 3/7] test --- .../java/org/apache/doris/fs/PersistentFileSystem.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java index f94b9a3d5c5d8c..9ffe426306da31 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java @@ -20,7 +20,6 @@ import org.apache.doris.analysis.StorageBackend; import org.apache.doris.common.io.Text; import org.apache.doris.datasource.property.storage.StorageProperties; -import org.apache.doris.persist.gson.GsonPreProcessable; import com.google.common.collect.Maps; import com.google.gson.annotations.SerializedName; @@ -33,7 +32,7 @@ /** * Use for persistence, Repository will persist properties of file system. */ -public abstract class PersistentFileSystem implements FileSystem, GsonPreProcessable { +public abstract class PersistentFileSystem implements FileSystem { public static final String STORAGE_TYPE = "_DORIS_STORAGE_TYPE_"; @SerializedName("prop") public Map properties = Maps.newHashMap(); @@ -79,13 +78,7 @@ public static PersistentFileSystem read(DataInput in) throws IOException { } if (properties.containsKey(STORAGE_TYPE)) { type = StorageBackend.StorageType.valueOf(properties.get(STORAGE_TYPE)); - properties.remove(STORAGE_TYPE); } return FileSystemFactory.get(type, name, properties); } - - @Override - public void gsonPreProcess() { - properties.put(STORAGE_TYPE, type.name()); - } } From c00d552c3dbeffddcdec61e357c278a49673c16c Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Sat, 7 Jun 2025 08:10:07 +0800 Subject: [PATCH 4/7] test --- .../doris/datasource/property/PropertyConverterTest.java | 4 ++-- .../trees/plans/commands/CreateRepositoryCommandTest.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java index 2dffab4534122c..2421e64dc95cf0 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java @@ -182,7 +182,7 @@ public void testS3RepositoryPropertiesConverter() throws Exception { CreateRepositoryStmt analyzedStmt = createStmt(s3Repo); Assertions.assertEquals(analyzedStmt.getProperties().size(), 4); Repository repository = getRepository(analyzedStmt, "s3_repo"); - Assertions.assertEquals(5, repository.getRemoteFileSystem().getProperties().size()); + Assertions.assertEquals(4, repository.getRemoteFileSystem().getProperties().size()); String s3RepoNew = "CREATE REPOSITORY `s3_repo_new`\n" + "WITH S3\n" @@ -196,7 +196,7 @@ public void testS3RepositoryPropertiesConverter() throws Exception { CreateRepositoryStmt analyzedStmtNew = createStmt(s3RepoNew); Assertions.assertEquals(analyzedStmtNew.getProperties().size(), 3); Repository repositoryNew = getRepository(analyzedStmtNew, "s3_repo_new"); - Assertions.assertEquals(4, repositoryNew.getRemoteFileSystem().getProperties().size()); + Assertions.assertEquals(3, repositoryNew.getRemoteFileSystem().getProperties().size()); } private static Repository getRepository(CreateRepositoryStmt analyzedStmt, String name) throws DdlException { diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateRepositoryCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateRepositoryCommandTest.java index 21595cc4cce588..d9c8dcaa3e1a00 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateRepositoryCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/CreateRepositoryCommandTest.java @@ -72,7 +72,7 @@ public void testS3RepositoryPropertiesConverter() throws Exception { Assertions.assertTrue(logicalPlan instanceof CreateRepositoryCommand); Assertions.assertEquals(((CreateRepositoryCommand) logicalPlan).getProperties().size(), 4); Repository repository = getRepository((CreateRepositoryCommand) logicalPlan, "s3_repo_command"); - Assertions.assertEquals(5, repository.getRemoteFileSystem().getProperties().size()); + Assertions.assertEquals(4, repository.getRemoteFileSystem().getProperties().size()); String s3RepoNew = "CREATE REPOSITORY `s3_repo_new_command`\n" + "WITH S3\n" @@ -88,7 +88,7 @@ public void testS3RepositoryPropertiesConverter() throws Exception { Assertions.assertTrue(logicalPlan1 instanceof CreateRepositoryCommand); Assertions.assertEquals(((CreateRepositoryCommand) logicalPlan1).getProperties().size(), 3); Repository repositoryNew = getRepository((CreateRepositoryCommand) logicalPlan1, "s3_repo_new_command"); - Assertions.assertEquals(repositoryNew.getRemoteFileSystem().getProperties().size(), 4); + Assertions.assertEquals(3, repositoryNew.getRemoteFileSystem().getProperties().size()); } @Disabled("not support") From f0e1a612893befc72c0ff07ad08fd15deabbc9e5 Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Sat, 7 Jun 2025 23:24:17 +0800 Subject: [PATCH 5/7] test --- .../org/apache/doris/fs/PersistentFileSystem.java | 4 +--- .../apache/doris/fs/remote/AzureFileSystem.java | 14 +++++++++++--- .../apache/doris/fs/remote/BrokerFileSystem.java | 10 +++++++++- .../org/apache/doris/fs/remote/S3FileSystem.java | 14 ++++++-------- .../apache/doris/fs/remote/dfs/DFSFileSystem.java | 12 +++++++----- .../doris/fs/remote/RemoteFileSystemTest.java | 6 ++++++ 6 files changed, 40 insertions(+), 20 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java index 9ffe426306da31..ec8f1a8b338fa8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/PersistentFileSystem.java @@ -23,7 +23,6 @@ import com.google.common.collect.Maps; import com.google.gson.annotations.SerializedName; -import lombok.Getter; import java.io.DataInput; import java.io.IOException; @@ -40,8 +39,7 @@ public abstract class PersistentFileSystem implements FileSystem { public String name; public StorageBackend.StorageType type; - @Getter - protected StorageProperties storageProperties; + public abstract StorageProperties getStorageProperties(); public PersistentFileSystem(String name, StorageBackend.StorageType type) { this.name = name; diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java index a30e2f09b367a4..88e09a67b33cf8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java @@ -21,6 +21,7 @@ import org.apache.doris.backup.Status; import org.apache.doris.common.UserException; import org.apache.doris.datasource.property.storage.AzureProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.fs.obj.AzureObjStorage; import org.apache.hadoop.fs.FileSystem; @@ -29,10 +30,12 @@ public class AzureFileSystem extends ObjFileSystem { + private final AzureProperties azureProperties; + public AzureFileSystem(AzureProperties azureProperties) { - super(StorageType.AZURE.name(), StorageType.S3, new AzureObjStorage(azureProperties)); - this.storageProperties = azureProperties; - this.properties.putAll(storageProperties.getOrigProps()); + super(StorageType.AZURE.name(), StorageType.AZURE, new AzureObjStorage(azureProperties)); + this.azureProperties = azureProperties; + this.properties.putAll(azureProperties.getOrigProps()); } @Override @@ -45,4 +48,9 @@ public Status globList(String remotePath, List result, boolean fileN AzureObjStorage azureObjStorage = (AzureObjStorage) getObjStorage(); return azureObjStorage.globList(remotePath, result, fileNameOnly); } + + @Override + public StorageProperties getStorageProperties() { + return azureProperties; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/BrokerFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/BrokerFileSystem.java index b70e1d37de9a61..deccdf03160316 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/BrokerFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/BrokerFileSystem.java @@ -28,6 +28,7 @@ import org.apache.doris.common.util.BrokerUtil; import org.apache.doris.datasource.property.PropertyConverter; import org.apache.doris.datasource.property.storage.BrokerProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.fs.operations.BrokerFileOperations; import org.apache.doris.fs.operations.OpParams; import org.apache.doris.service.FrontendOptions; @@ -76,14 +77,16 @@ public class BrokerFileSystem extends RemoteFileSystem { private static final Logger LOG = LogManager.getLogger(BrokerFileSystem.class); private final BrokerFileOperations operations; + private final BrokerProperties brokerProperties; + //todo The method parameter should use the interface type StorageProperties instead of a specific implementation. public BrokerFileSystem(String name, Map properties) { super(name, StorageBackend.StorageType.BROKER); properties.putAll(PropertyConverter.convertToHadoopFSProperties(properties)); this.properties = properties; this.operations = new BrokerFileOperations(name, properties); // support broker properties in future - this.storageProperties = new BrokerProperties(properties); + this.brokerProperties = new BrokerProperties(properties); } public Pair getBroker() { @@ -701,4 +704,9 @@ public Status globList(String remotePath, List result, boolean fileN public Status makeDir(String remotePath) { return new Status(Status.ErrCode.COMMON_ERROR, "mkdir is not implemented."); } + + @Override + public StorageProperties getStorageProperties() { + return brokerProperties; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java index da168b1a162f75..edc87003d3a696 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java @@ -23,6 +23,7 @@ import org.apache.doris.common.security.authentication.HadoopAuthenticator; import org.apache.doris.common.util.S3URI; import org.apache.doris.datasource.property.storage.AbstractS3CompatibleProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.fs.obj.S3ObjStorage; import com.google.common.annotations.VisibleForTesting; @@ -39,7 +40,7 @@ public class S3FileSystem extends ObjFileSystem { private static final Logger LOG = LogManager.getLogger(S3FileSystem.class); private HadoopAuthenticator authenticator = null; - private AbstractS3CompatibleProperties s3Properties; + private final AbstractS3CompatibleProperties s3Properties; public S3FileSystem(AbstractS3CompatibleProperties s3Properties) { @@ -47,19 +48,16 @@ public S3FileSystem(AbstractS3CompatibleProperties s3Properties) { super(StorageBackend.StorageType.S3.name(), StorageBackend.StorageType.S3, new S3ObjStorage(s3Properties)); this.s3Properties = s3Properties; - this.storageProperties = s3Properties; initFsProperties(); - } - @VisibleForTesting - public S3FileSystem(S3ObjStorage storage) { - super(StorageBackend.StorageType.S3.name(), StorageBackend.StorageType.S3, storage); - initFsProperties(); + @Override + public StorageProperties getStorageProperties() { + return s3Properties; } private void initFsProperties() { - this.properties.putAll(storageProperties.getOrigProps()); + this.properties.putAll(s3Properties.getOrigProps()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java index 5d9869b817e58c..b6e3f119243f28 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/dfs/DFSFileSystem.java @@ -23,6 +23,7 @@ import org.apache.doris.common.security.authentication.HadoopAuthenticator; import org.apache.doris.common.util.URI; import org.apache.doris.datasource.property.storage.HdfsCompatibleProperties; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.doris.fs.operations.HDFSFileOperations; import org.apache.doris.fs.operations.HDFSOpParams; import org.apache.doris.fs.operations.OpParams; @@ -62,15 +63,19 @@ public class DFSFileSystem extends RemoteFileSystem { private static final Logger LOG = LogManager.getLogger(DFSFileSystem.class); private HDFSFileOperations operations = null; private HadoopAuthenticator authenticator = null; - private HdfsCompatibleProperties hdfsProperties; + private final HdfsCompatibleProperties hdfsProperties; public DFSFileSystem(HdfsCompatibleProperties hdfsProperties) { super(StorageBackend.StorageType.HDFS.name(), StorageBackend.StorageType.HDFS); this.properties.putAll(hdfsProperties.getOrigProps()); - this.storageProperties = hdfsProperties; this.hdfsProperties = hdfsProperties; } + @Override + public StorageProperties getStorageProperties() { + return hdfsProperties; + } + public DFSFileSystem(HdfsCompatibleProperties hdfsProperties, StorageBackend.StorageType storageType) { super(storageType.name(), storageType); this.properties.putAll(hdfsProperties.getOrigProps()); @@ -90,9 +95,6 @@ public FileSystem nativeFileSystem(String remotePath) throws UserException { } if (dfsFileSystem == null) { Configuration conf = hdfsProperties.getHadoopConfiguration(); - // TODO: Temporarily disable the HDFS file system cache to prevent instances from being closed by - // each other in V1. This line can be removed once V1 and V2 are unified. - conf.set("fs.hdfs.impl.disable.cache", "true"); authenticator = HadoopAuthenticator.getHadoopAuthenticator(conf); try { dfsFileSystem = authenticator.doAs(() -> { diff --git a/fe/fe-core/src/test/java/org/apache/doris/fs/remote/RemoteFileSystemTest.java b/fe/fe-core/src/test/java/org/apache/doris/fs/remote/RemoteFileSystemTest.java index 9ad929edb224d8..632f77ec4f5308 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/fs/remote/RemoteFileSystemTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/fs/remote/RemoteFileSystemTest.java @@ -19,6 +19,7 @@ import org.apache.doris.analysis.StorageBackend; import org.apache.doris.backup.Status; +import org.apache.doris.datasource.property.storage.StorageProperties; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -45,6 +46,11 @@ public class RemoteFileSystemTest { @BeforeEach void setUp() { remoteFileSystem = Mockito.spy(new RemoteFileSystem("test", StorageBackend.StorageType.HDFS) { + @Override + public StorageProperties getStorageProperties() { + return null; + } + @Override public Status exists(String remotePath) { return null; From 0f9ade0f8f8ce73f136498e362082dce5bc4102c Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Sat, 7 Jun 2025 23:32:12 +0800 Subject: [PATCH 6/7] test --- .../java/org/apache/doris/fs/remote/RemoteFileSystem.java | 7 +++++++ .../main/java/org/apache/doris/fs/remote/S3FileSystem.java | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFileSystem.java index a9ea0f4e1b34a2..26b4632cc01568 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFileSystem.java @@ -48,6 +48,13 @@ public RemoteFileSystem(String name, StorageBackend.StorageType type) { super(name, type); } + /* + * todo In the previous design, RemoteFileSystem was modeled as an HDFS-style + * file system. However, this is no longer accurate — services like Azure + * and the refactored S3 do not follow the same semantics. We need to rethink + * the modeling of RemoteFileSystem. At the very least, this method should be + * deprecated and removed in the future, as keeping it may lead to functional inconsistencies. + */ protected org.apache.hadoop.fs.FileSystem nativeFileSystem(String remotePath) throws UserException { throw new UserException("Not support to getFileSystem."); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java index edc87003d3a696..7d36f7b0833bd1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java @@ -66,7 +66,6 @@ protected FileSystem nativeFileSystem(String remotePath) throws UserException { throw new UserException("S3 does not support native file system"); } - // broker file pattern glob is too complex, so we use hadoop directly @Override public Status globList(String remotePath, List result, boolean fileNameOnly) { S3ObjStorage objStorage = (S3ObjStorage) this.objStorage; From 5442be39eda34b01f4a8e4d341dc734de80d9807 Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Sun, 8 Jun 2025 09:02:06 +0800 Subject: [PATCH 7/7] :( --- .../suites/cold_heat_separation/policy/drop.groovy | 6 +++--- .../cold_heat_separation/policy/drop_hdfs_reource.groovy | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/regression-test/suites/cold_heat_separation/policy/drop.groovy b/regression-test/suites/cold_heat_separation/policy/drop.groovy index 6879e4891afc3c..5e0f385e0978d2 100644 --- a/regression-test/suites/cold_heat_separation/policy/drop.groovy +++ b/regression-test/suites/cold_heat_separation/policy/drop.groovy @@ -84,7 +84,7 @@ suite("drop_policy") { CREATE STORAGE POLICY ${use_policy} PROPERTIES( "storage_resource" = "${resource_table_use}", - "cooldown_datetime" = "2025-06-08 00:00:00" + "cooldown_datetime" = "2035-06-08 00:00:00" ); """ assertEquals(storage_exist.call(use_policy), true) @@ -105,7 +105,7 @@ suite("drop_policy") { CREATE STORAGE POLICY IF NOT EXISTS drop_policy_test_has_table_binded PROPERTIES( "storage_resource" = "${resource_table_use}", - "cooldown_datetime" = "2025-06-08 00:00:00" + "cooldown_datetime" = "2035-06-08 00:00:00" ); """ assertEquals(storage_exist.call("drop_policy_test_has_table_binded"), true) @@ -114,7 +114,7 @@ suite("drop_policy") { CREATE STORAGE POLICY IF NOT EXISTS drop_policy_test_has_table_bind_1 PROPERTIES( "storage_resource" = "${resource_table_use}", - "cooldown_datetime" = "2025-06-08 00:00:00" + "cooldown_datetime" = "2035-06-08 00:00:00" ); """ assertEquals(storage_exist.call("drop_policy_test_has_table_bind_1"), true) diff --git a/regression-test/suites/cold_heat_separation/policy/drop_hdfs_reource.groovy b/regression-test/suites/cold_heat_separation/policy/drop_hdfs_reource.groovy index 39bbaf09e7a439..3f6b236147f081 100644 --- a/regression-test/suites/cold_heat_separation/policy/drop_hdfs_reource.groovy +++ b/regression-test/suites/cold_heat_separation/policy/drop_hdfs_reource.groovy @@ -80,7 +80,7 @@ suite("drop_hdfs_policy") { CREATE STORAGE POLICY ${use_policy} PROPERTIES( "storage_resource" = "${resource_table_use}", - "cooldown_datetime" = "2025-06-08 00:00:00" + "cooldown_datetime" = "2035-06-08 00:00:00" ); """ assertEquals(storage_exist.call(use_policy), true) @@ -101,7 +101,7 @@ suite("drop_hdfs_policy") { CREATE STORAGE POLICY IF NOT EXISTS drop_policy_test_has_table_binded_hdfs PROPERTIES( "storage_resource" = "${resource_table_use}", - "cooldown_datetime" = "2025-06-08 00:00:00" + "cooldown_datetime" = "2035-06-08 00:00:00" ); """ assertEquals(storage_exist.call("drop_policy_test_has_table_binded_hdfs"), true)