Skip to content

Commit 4e5b9cf

Browse files
gosusnpjhammarstedt
authored andcommitted
Add scheduled task to clean up old files from workspace (airbytehq#16247)
* Add airbyte-cron to bumpversion * Update airbyte-cron version to current * Add workspace clean up job * Add missing env var to docker-compose * Update file deletion logging
1 parent fcc0b9a commit 4e5b9cf

File tree

5 files changed

+78
-1
lines changed

5 files changed

+78
-1
lines changed

.bumpversion.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ serialize =
1414

1515
[bumpversion:file:airbyte-container-orchestrator/Dockerfile]
1616

17+
[bumpversion:file:airbyte-cron/Dockerfile]
18+
1719
[bumpversion:file:airbyte-metrics/reporter/Dockerfile]
1820

1921
[bumpversion:file:airbyte-server/Dockerfile]

airbyte-cron/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ ARG JDK_VERSION=19-slim-bullseye
22
ARG JDK_IMAGE=openjdk:${JDK_VERSION}
33
FROM ${JDK_IMAGE} AS cron
44

5-
ARG VERSION=0.40.0-alpha
5+
ARG VERSION=0.40.3
66

77
ENV APPLICATION airbyte-cron
88
ENV VERSION ${VERSION}

airbyte-cron/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ plugins {
33
}
44

55
dependencies {
6+
implementation project(':airbyte-config:config-models')
67
implementation project(':airbyte-workers')
78

89
runtimeOnly 'io.micronaut:micronaut-http-server-netty:3.6.0'
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3+
*/
4+
5+
package io.airbyte.cron.selfhealing;
6+
7+
import io.airbyte.config.Configs;
8+
import io.airbyte.config.EnvConfigs;
9+
import io.micronaut.scheduling.annotation.Scheduled;
10+
import java.io.File;
11+
import java.io.IOException;
12+
import java.nio.file.Files;
13+
import java.nio.file.Path;
14+
import java.time.LocalDateTime;
15+
import java.time.OffsetDateTime;
16+
import java.util.Date;
17+
import java.util.concurrent.atomic.AtomicInteger;
18+
import javax.inject.Singleton;
19+
import lombok.extern.slf4j.Slf4j;
20+
import org.apache.commons.io.FileUtils;
21+
import org.apache.commons.io.filefilter.AgeFileFilter;
22+
23+
@Singleton
24+
@Slf4j
25+
public class WorkspaceCleaner {
26+
27+
private final Path workspaceRoot;
28+
private final long maxAgeFilesInDays;
29+
30+
WorkspaceCleaner() {
31+
// TODO Configs should get injected through micronaut
32+
final Configs configs = new EnvConfigs();
33+
34+
this.workspaceRoot = configs.getWorkspaceRoot();
35+
// We align max file age on temporal for history consistency
36+
// It might make sense configure this independently in the future
37+
this.maxAgeFilesInDays = configs.getTemporalRetentionInDays();
38+
}
39+
40+
/*
41+
* Delete files older than maxAgeFilesInDays from the workspace
42+
*
43+
* NOTE: this is currently only intended to work for docker
44+
*/
45+
@Scheduled(fixedRate = "1d")
46+
public void deleteOldFiles() throws IOException {
47+
final Date oldestAllowed = getDateFromDaysAgo(maxAgeFilesInDays);
48+
log.info("Deleting files older than {} days ({})", maxAgeFilesInDays, oldestAllowed);
49+
50+
final AtomicInteger counter = new AtomicInteger(0);
51+
Files.walk(workspaceRoot)
52+
.map(Path::toFile)
53+
.filter(f -> new AgeFileFilter(oldestAllowed).accept(f))
54+
.forEach(file -> {
55+
log.debug("Deleting file: " + file.toString());
56+
FileUtils.deleteQuietly(file);
57+
counter.incrementAndGet();
58+
final File parentDir = file.getParentFile();
59+
if (parentDir.isDirectory() && parentDir.listFiles().length == 0) {
60+
FileUtils.deleteQuietly(parentDir);
61+
}
62+
});
63+
log.info("deleted {} files", counter.get());
64+
}
65+
66+
private static Date getDateFromDaysAgo(final long daysAgo) {
67+
return Date.from(LocalDateTime.now().minusDays(daysAgo).toInstant(OffsetDateTime.now().getOffset()));
68+
}
69+
70+
}

docker-compose.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,10 @@ services:
186186
- POSTGRES_PWD=${DATABASE_PASSWORD}
187187
- POSTGRES_SEEDS=${DATABASE_HOST}
188188
- POSTGRES_USER=${DATABASE_USER}
189+
- TEMPORAL_HISTORY_RETENTION_IN_DAYS=${TEMPORAL_HISTORY_RETENTION_IN_DAYS}
190+
- WORKSPACE_ROOT=${WORKSPACE_ROOT}
191+
volumes:
192+
- workspace:${WORKSPACE_ROOT}
189193
volumes:
190194
workspace:
191195
name: ${WORKSPACE_DOCKER_MOUNT}

0 commit comments

Comments
 (0)