-
Notifications
You must be signed in to change notification settings - Fork 4.5k
🎉 Migrate config persistence to database #4670
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
6ec78f2
947d03a
e83c373
dc86c62
648ce17
e72843c
1067aa4
607bf07
33ed1cd
19a8f2e
3a9db90
6fb38bf
576f28d
8b85a03
d62a72b
d7e4815
91f2f77
dc14826
9be88a3
991a374
7b845ce
c2f9275
865e11b
d88e663
ee9b3ea
1d7741e
c29bb7e
ad9c982
0c68bc0
86ee51e
d6b95b2
03ac64a
7537864
024253b
ae638ef
0d22400
c0ce70d
d76f58f
3d99a73
d99cda7
449cb30
d37b9fc
3461382
fb0638e
10cabfa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
- workspaceId: 5ae6b09b-fdec-41af-aaf7-7d94cfc33ef6 | ||
name: default | ||
slug: default | ||
initialSetupComplete: false | ||
displaySetupWizard: true | ||
tombstone: false |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,9 @@ public class EnvConfigs implements Configs { | |
public static final String DATABASE_USER = "DATABASE_USER"; | ||
public static final String DATABASE_PASSWORD = "DATABASE_PASSWORD"; | ||
public static final String DATABASE_URL = "DATABASE_URL"; | ||
public static final String CONFIG_DATABASE_USER = "CONFIG_DATABASE_USER"; | ||
public static final String CONFIG_DATABASE_PASSWORD = "CONFIG_DATABASE_PASSWORD"; | ||
public static final String CONFIG_DATABASE_URL = "CONFIG_DATABASE_URL"; | ||
public static final String WEBAPP_URL = "WEBAPP_URL"; | ||
private static final String MINIMUM_WORKSPACE_RETENTION_DAYS = "MINIMUM_WORKSPACE_RETENTION_DAYS"; | ||
private static final String MAXIMUM_WORKSPACE_RETENTION_DAYS = "MAXIMUM_WORKSPACE_RETENTION_DAYS"; | ||
|
@@ -127,6 +130,24 @@ public String getDatabaseUrl() { | |
return getEnsureEnv(DATABASE_URL); | ||
} | ||
|
||
@Override | ||
public String getConfigDatabaseUser() { | ||
// Default to reuse the job database | ||
return getEnvOrDefault(CONFIG_DATABASE_USER, getDatabaseUser()); | ||
} | ||
|
||
@Override | ||
public String getConfigDatabasePassword() { | ||
// Default to reuse the job database | ||
return getEnvOrDefault(CONFIG_DATABASE_PASSWORD, getDatabasePassword()); | ||
} | ||
|
||
@Override | ||
public String getConfigDatabaseUrl() { | ||
// Default to reuse the job database | ||
return getEnvOrDefault(CONFIG_DATABASE_URL, getDatabaseUrl()); | ||
} | ||
|
||
@Override | ||
public String getWebappUrl() { | ||
return getEnsureEnv(WEBAPP_URL); | ||
|
@@ -255,10 +276,10 @@ private long getEnvOrDefault(String key, long defaultValue) { | |
|
||
private <T> T getEnvOrDefault(String key, T defaultValue, Function<String, T> parser) { | ||
final String value = getEnv.apply(key); | ||
if (value != null) { | ||
if (value != null && !value.isEmpty()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes! This has bitten us a few times... |
||
return parser.apply(value); | ||
} else { | ||
LOGGER.info(key + " not found, defaulting to " + defaultValue); | ||
LOGGER.info(key + " not found or empty, defaulting to " + defaultValue); | ||
return defaultValue; | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,10 @@ | ||
dependencies { | ||
implementation group: 'commons-io', name: 'commons-io', version: '2.7' | ||
|
||
implementation project(':airbyte-db') | ||
implementation project(':airbyte-config:models') | ||
implementation project(":airbyte-json-validation") | ||
implementation project(':airbyte-config:init') | ||
implementation project(':airbyte-json-validation') | ||
|
||
testImplementation "org.testcontainers:postgresql:1.15.1" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* MIT License | ||
* | ||
* Copyright (c) 2020 Airbyte | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to deal | ||
* in the Software without restriction, including without limitation the rights | ||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the Software is | ||
* furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in all | ||
* copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
* SOFTWARE. | ||
*/ | ||
|
||
package io.airbyte.config.persistence; | ||
|
||
import static org.jooq.impl.DSL.field; | ||
import static org.jooq.impl.DSL.table; | ||
|
||
import java.sql.Timestamp; | ||
import org.jooq.Field; | ||
import org.jooq.JSONB; | ||
import org.jooq.Record; | ||
import org.jooq.Table; | ||
|
||
public class AirbyteConfigsTable { | ||
|
||
public static final String AIRBYTE_CONFIGS_TABLE_SCHEMA = "airbyte_configs_table.sql"; | ||
|
||
public static final Table<Record> AIRBYTE_CONFIGS = table("airbyte_configs"); | ||
public static final Field<String> CONFIG_ID = field("config_id", String.class); | ||
public static final Field<String> CONFIG_TYPE = field("config_type", String.class); | ||
public static final Field<JSONB> CONFIG_BLOB = field("config_blob", JSONB.class); | ||
public static final Field<Timestamp> CREATED_AT = field("created_at", Timestamp.class); | ||
public static final Field<Timestamp> UPDATED_AT = field("updated_at", Timestamp.class); | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
/* | ||
* MIT License | ||
* | ||
* Copyright (c) 2020 Airbyte | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to deal | ||
* in the Software without restriction, including without limitation the rights | ||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the Software is | ||
* furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in all | ||
* copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
* SOFTWARE. | ||
*/ | ||
|
||
package io.airbyte.config.persistence; | ||
|
||
import static io.airbyte.config.persistence.AirbyteConfigsTable.AIRBYTE_CONFIGS_TABLE_SCHEMA; | ||
|
||
import io.airbyte.commons.resources.MoreResources; | ||
import io.airbyte.config.Configs; | ||
import io.airbyte.db.Database; | ||
import io.airbyte.db.Databases; | ||
import java.io.IOException; | ||
import java.nio.file.Path; | ||
import java.util.function.Function; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* By default, this factory returns a database config persistence. it can still return a file system | ||
* config persistence for testing purpose. This legacy feature should be removed after the file to | ||
* database migration is completely done. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking it would be really nice to keep a working file-based config persistence and not necessarily remove it permanently... If this file config persistence reads/writes from a docker volume that is mounted from a local folder of the filesystem (like what the WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious, what benefits do you see from working with a file-based config persistence vs just using import/export capabilities? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess what Chris means is that if we keep the file config persistence code, users can still manipulate the configs using the code? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For import/export capabilities, we'd have what is currently exposed as:
In the future, I am guessing we could have something like these?
Yes. For dev/test purposes, you can more easily edit/test your airbyte configurations locally by manually/programmatically editing the configs in parallel without having to restart airbyte all the time to re-init the database (re-deploy your changes)? Plus the code is almost already written for that, so it could be worth keeping around |
||
*/ | ||
public class ConfigPersistenceFactory { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(ConfigPersistenceFactory.class); | ||
|
||
private final Configs configs; | ||
private final boolean setupDatabase; | ||
private final boolean useConfigDatabase; | ||
tuliren marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/** | ||
* @param setupDatabase initialize the database and load data; this is necessary because this method | ||
* has multiple callers, and we want to setup the database only once to prevent race | ||
* conditions. | ||
*/ | ||
private ConfigPersistenceFactory(Configs configs, boolean setupDatabase, boolean useConfigDatabase) { | ||
this.configs = configs; | ||
this.setupDatabase = setupDatabase; | ||
this.useConfigDatabase = useConfigDatabase; | ||
} | ||
|
||
public static Builder build(Configs configs) { | ||
tuliren marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return new Builder(configs); | ||
} | ||
|
||
public static class Builder { | ||
|
||
private final Configs configs; | ||
private boolean setupDatabase = true; | ||
tuliren marked this conversation as resolved.
Show resolved
Hide resolved
|
||
private boolean useConfigDatabase = true; | ||
|
||
private Builder(Configs configs) { | ||
this.configs = configs; | ||
} | ||
|
||
public Builder setupDatabase(boolean setupDatabase) { | ||
this.setupDatabase = setupDatabase; | ||
return this; | ||
} | ||
|
||
public Builder useConfigDatabase(boolean useConfigDatabase) { | ||
this.useConfigDatabase = useConfigDatabase; | ||
return this; | ||
} | ||
|
||
public ConfigPersistenceFactory get() { | ||
return new ConfigPersistenceFactory(configs, setupDatabase, useConfigDatabase); | ||
} | ||
|
||
} | ||
|
||
/** | ||
* Create a config persistence based on the configs. | ||
* <p/> | ||
* If config root is defined, create a database config persistence and copy the configs from the | ||
* file-based config persistence. Otherwise, seed the database from the yaml files. | ||
*/ | ||
public ConfigPersistence create() throws IOException { | ||
if (!useConfigDatabase) { | ||
Path configRoot = configs.getConfigRoot(); | ||
LOGGER.info("Use file system config persistence (root: {})", configRoot); | ||
return FileSystemConfigPersistence.createWithValidation(configRoot); | ||
} | ||
|
||
if (configs.getConfigRoot() == null) { | ||
// This branch will only be true in a future Airbyte version, in which | ||
// the config root is no longer required and everything lives in the database. | ||
return createDbPersistenceWithYamlSeed(); | ||
} | ||
|
||
return createDbPersistenceWithFileSeed(); | ||
} | ||
|
||
ConfigPersistence createDbPersistenceWithYamlSeed() throws IOException { | ||
tuliren marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ConfigPersistence seedConfigPersistence = new YamlSeedConfigPersistence(); | ||
return createDbPersistence(seedConfigPersistence); | ||
} | ||
|
||
ConfigPersistence createDbPersistenceWithFileSeed() throws IOException { | ||
tuliren marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Path configRoot = configs.getConfigRoot(); | ||
ConfigPersistence fsConfigPersistence = FileSystemConfigPersistence.createWithValidation(configRoot); | ||
return createDbPersistence(fsConfigPersistence); | ||
} | ||
|
||
ConfigPersistence createDbPersistence(ConfigPersistence seedConfigPersistence) throws IOException { | ||
LOGGER.info("Use database config persistence."); | ||
|
||
// When setupDatabase is true, it means the database will be initialized after we | ||
// connect to the database. So the database itself is considered ready as long as | ||
// the connection is alive. Otherwise, the database is expected to have full data. | ||
Function<Database, Boolean> isReady = setupDatabase | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it took me kinda a long time to reason about this. I wonder if it would be easier to understand if we just have two methods in databases There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The issue is that the queries we need to run to check if job database and config database are ready are different. So creating two methods in |
||
? Databases.IS_CONFIG_DATABASE_CONNECTED | ||
: Databases.IS_CONFIG_DATABASE_LOADED_WITH_DATA; | ||
|
||
Database database = Databases.createPostgresDatabaseWithRetry( | ||
configs.getConfigDatabaseUser(), | ||
configs.getConfigDatabasePassword(), | ||
configs.getConfigDatabaseUrl(), | ||
isReady); | ||
|
||
DatabaseConfigPersistence dbConfigPersistence = new DatabaseConfigPersistence(database); | ||
if (setupDatabase) { | ||
dbConfigPersistence.initialize(MoreResources.readResource(AIRBYTE_CONFIGS_TABLE_SCHEMA)); | ||
dbConfigPersistence.loadData(seedConfigPersistence); | ||
} | ||
|
||
return new ValidatingConfigPersistence(dbConfigPersistence); | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't like the increased optionality around a custom config database here. Is there a reason we want to encourage separating this?
For users that were previously on the job database for configs, we don't want to handle migrating configs to the config database or having multiple versions of configs sitting around if we can help it.
It isn't clear to me what the benefit is, and it seems to just increase operational complexity.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We do want to have the option to separate the two databases. But I don't think we want to encourage it, at least for self-deployed instances.
The benefits of separating the two databases are:
What about if I remove the comments in the
.env
file, and only mention it in the documentation? In this way, we are not encouraging people to do it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removing them from the
.env
will result in warning messages about missing environment variables. So I added them back with comments: