Skip to content

Commit 7bb4022

Browse files
committed
* log-exporter: export action and event in parquet format
Signed-off-by: neo <[email protected]>
1 parent e35fad8 commit 7bb4022

15 files changed

+312
-79
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
* sse: send ErrorResponse to client via "event: error" on exception
66
* sse: log clientIP on sse:close action
7+
* log-exporter: export action and event in parquet format
78

89
### 9.1.6 (2/10/2025 - 2/25/2025)
910

core-ng/src/main/java/core/framework/log/message/EventMessage.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ public class EventMessage {
2525
public String errorCode;
2626
@Property(name = "error_message")
2727
public String errorMessage;
28+
@Property(name = "elapsed")
29+
public Long elapsed;
2830
@Property(name = "context")
2931
public Map<String, String> context;
3032
@Property(name = "stats")
3133
public Map<String, Double> stats;
3234
@Property(name = "info")
3335
public Map<String, String> info;
34-
@Property(name = "elapsed")
35-
public Long elapsed;
3636
}

ext/build.gradle.kts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,17 @@ project("log-exporter") {
2424
apply(plugin = "app")
2525
dependencies {
2626
implementation(project(":core-ng"))
27+
28+
// for parquet
29+
compileOnly("org.apache.hadoop:hadoop-annotations:3.4.1")
30+
implementation("org.apache.parquet:parquet-avro:1.15.0")
31+
implementation("org.apache.avro:avro:1.12.0")
32+
implementation("org.apache.hadoop:hadoop-common:3.4.1@jar")
33+
runtimeOnly("commons-collections:commons-collections:3.2.2@jar")
34+
runtimeOnly("com.fasterxml.woodstox:woodstox-core:5.4.0@jar")
35+
runtimeOnly("org.codehaus.woodstox:stax2-api:4.2.1@jar")
36+
runtimeOnly("org.apache.hadoop.thirdparty:hadoop-shaded-guava:1.2.0@jar")
37+
2738
testImplementation(project(":core-ng-test"))
2839
}
2940
}

ext/log-exporter/src/main/java/core/log/LogExporterApp.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,17 @@
55
import core.framework.log.message.EventMessage;
66
import core.framework.log.message.LogTopics;
77
import core.framework.module.App;
8+
import core.log.domain.ActionLogSchema;
9+
import core.log.domain.EventSchema;
810
import core.log.job.ProcessLogJob;
911
import core.log.kafka.ActionLogMessageHandler;
1012
import core.log.kafka.EventMessageHandler;
1113
import core.log.service.ArchiveService;
1214
import core.log.service.UploadService;
1315
import core.log.web.UploadController;
1416
import core.log.web.UploadRequest;
17+
import org.apache.avro.data.TimeConversions;
18+
import org.apache.avro.specific.SpecificData;
1519

1620
import java.time.Duration;
1721
import java.time.LocalTime;
@@ -31,6 +35,11 @@ protected void initialize() {
3135
kafka().minPoll(1024 * 1024, Duration.ofMillis(5000)); // try to get at least 1M message, and can wait longer
3236
kafka().maxPoll(3000, 3 * 1024 * 1024); // get 3M message at max
3337

38+
SpecificData specificData = SpecificData.get();
39+
specificData.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion());
40+
bind(ActionLogSchema.class);
41+
bind(EventSchema.class);
42+
3443
bind(new UploadService(requiredProperty("app.log.bucket")));
3544
bind(ArchiveService.class);
3645

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package core.log.domain;
2+
3+
import core.framework.log.message.ActionLogMessage;
4+
import core.framework.log.message.PerformanceStatMessage;
5+
import core.framework.util.Maps;
6+
import org.apache.avro.LogicalTypes;
7+
import org.apache.avro.Schema;
8+
import org.apache.avro.SchemaBuilder;
9+
import org.apache.avro.generic.GenericData;
10+
11+
import java.util.Map;
12+
13+
public class ActionLogSchema {
14+
public final Schema schema;
15+
16+
public ActionLogSchema() {
17+
schema = SchemaBuilder.record("action")
18+
.fields()
19+
.requiredString("id")
20+
.name("date").type().optional().type(LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)))
21+
.requiredString("app")
22+
.requiredString("host")
23+
.requiredString("result")
24+
.requiredString("action")
25+
.name("correlation_ids").type().optional().array().items().stringType()
26+
.name("client").type().optional().array().items().stringType()
27+
.name("ref_ids").type().optional().array().items().stringType()
28+
.optionalString("error_code")
29+
.optionalString("error_message")
30+
.requiredLong("elapsed")
31+
.name("context").type().optional().type(SchemaBuilder.map().values().stringType())
32+
.name("stats").type().optional().map().values().doubleType()
33+
.name("perf_stats").type().optional().map().values().longType()
34+
.endRecord();
35+
}
36+
37+
public GenericData.Record record(ActionLogMessage message) {
38+
var record = new GenericData.Record(schema);
39+
record.put("id", message.id);
40+
record.put("date", message.date);
41+
record.put("app", message.app);
42+
record.put("host", message.host);
43+
record.put("result", message.result);
44+
record.put("action", message.action);
45+
record.put("correlation_ids", message.correlationIds);
46+
record.put("client", message.clients);
47+
record.put("ref_ids", message.refIds);
48+
record.put("error_code", message.errorCode);
49+
record.put("error_message", message.errorMessage);
50+
record.put("elapsed", message.elapsed);
51+
record.put("context", message.context);
52+
record.put("stats", message.stats);
53+
Map<String, Long> perfStats = Maps.newHashMapWithExpectedSize(message.performanceStats.size() * 3);
54+
for (Map.Entry<String, PerformanceStatMessage> entry : message.performanceStats.entrySet()) {
55+
String key = entry.getKey();
56+
PerformanceStatMessage stat = entry.getValue();
57+
perfStats.put(key + ".count", (long) stat.count);
58+
perfStats.put(key + ".total_elapsed", stat.totalElapsed);
59+
if (stat.readEntries != null) perfStats.put(key + ".read_entries", (long) stat.readEntries);
60+
if (stat.writeEntries != null) perfStats.put(key + ".write_entries", (long) stat.writeEntries);
61+
}
62+
record.put("perf_stats", message.performanceStats);
63+
return record;
64+
}
65+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package core.log.domain;
2+
3+
import core.framework.log.message.EventMessage;
4+
import org.apache.avro.LogicalTypes;
5+
import org.apache.avro.Schema;
6+
import org.apache.avro.SchemaBuilder;
7+
import org.apache.avro.generic.GenericData;
8+
9+
public class EventSchema {
10+
public final Schema schema;
11+
12+
public EventSchema() {
13+
schema = SchemaBuilder.record("event")
14+
.fields()
15+
.requiredString("id")
16+
.name("date").type().optional().type(LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)))
17+
.requiredString("app")
18+
.name("received_time").type().optional().type(LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)))
19+
.requiredString("result")
20+
.requiredString("action")
21+
.optionalString("error_code")
22+
.optionalString("error_message")
23+
.requiredLong("elapsed")
24+
.name("context").type().optional().type(SchemaBuilder.map().values().stringType())
25+
.name("stats").type().optional().map().values().doubleType()
26+
.name("info").type().optional().type(SchemaBuilder.map().values().stringType())
27+
.endRecord();
28+
}
29+
30+
public GenericData.Record record(EventMessage message) {
31+
var record = new GenericData.Record(schema);
32+
record.put("id", message.id);
33+
record.put("date", message.date);
34+
record.put("app", message.app);
35+
record.put("received_time", message.receivedTime);
36+
record.put("result", message.result);
37+
record.put("action", message.action);
38+
record.put("error_code", message.errorCode);
39+
record.put("error_message", message.errorMessage);
40+
record.put("elapsed", message.elapsed);
41+
record.put("context", message.context);
42+
record.put("stats", message.stats);
43+
record.put("info", message.info);
44+
return record;
45+
}
46+
}

ext/log-exporter/src/main/java/core/log/job/ProcessLogJob.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import core.framework.scheduler.JobContext;
66
import core.log.service.ArchiveService;
77

8+
import java.io.IOException;
89
import java.time.LocalDate;
910

1011
/**
@@ -15,7 +16,7 @@ public class ProcessLogJob implements Job {
1516
ArchiveService archiveService;
1617

1718
@Override
18-
public void execute(JobContext context) {
19+
public void execute(JobContext context) throws IOException {
1920
LocalDate today = context.scheduledTime.toLocalDate();
2021
archiveService.cleanupArchive(today.minusDays(5)); // cleanup first, to free disk space when possible
2122
archiveService.uploadArchive(today.minusDays(1));
Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,46 @@
11
package core.log.kafka;
22

33
import core.framework.inject.Inject;
4-
import core.framework.internal.json.JSONWriter;
54
import core.framework.kafka.BulkMessageHandler;
65
import core.framework.kafka.Message;
76
import core.framework.log.message.ActionLogMessage;
8-
import core.log.domain.ActionLogEntry;
7+
import core.log.domain.ActionLogSchema;
98
import core.log.service.ArchiveService;
9+
import org.apache.avro.file.DataFileWriter;
10+
import org.apache.avro.generic.GenericData;
11+
import org.apache.avro.specific.SpecificDatumWriter;
1012

11-
import java.io.BufferedOutputStream;
1213
import java.io.IOException;
1314
import java.nio.file.Files;
1415
import java.nio.file.Path;
15-
import java.time.LocalDateTime;
16+
import java.time.LocalDate;
1617
import java.util.List;
1718

18-
import static java.nio.file.StandardOpenOption.APPEND;
19-
import static java.nio.file.StandardOpenOption.CREATE;
20-
2119
/**
2220
* @author neo
2321
*/
2422
public class ActionLogMessageHandler implements BulkMessageHandler<ActionLogMessage> {
25-
private final JSONWriter<ActionLogEntry> writer = new JSONWriter<>(ActionLogEntry.class);
26-
2723
@Inject
2824
ArchiveService archiveService;
25+
@Inject
26+
ActionLogSchema schema;
2927

3028
@Override
3129
public void handle(List<Message<ActionLogMessage>> messages) throws IOException {
32-
LocalDateTime now = LocalDateTime.now();
30+
LocalDate now = LocalDate.now();
3331

34-
Path path = archiveService.initializeLogFilePath(archiveService.actionLogPath(now.toLocalDate()));
35-
try (BufferedOutputStream stream = new BufferedOutputStream(Files.newOutputStream(path, CREATE, APPEND), 3 * 1024 * 1024)) {
36-
for (Message<ActionLogMessage> message : messages) {
37-
ActionLogEntry entry = entry(message.value);
32+
Path path = archiveService.localActionLogFilePath(now);
33+
archiveService.createParentDir(path);
3834

39-
stream.write(writer.toJSON(entry));
40-
stream.write('\n');
35+
try (DataFileWriter<GenericData.Record> writer = new DataFileWriter<>(new SpecificDatumWriter<>(schema.schema))) {
36+
if (!Files.exists(path)) {
37+
writer.create(schema.schema, path.toFile());
38+
} else {
39+
writer.appendTo(path.toFile());
40+
}
41+
for (Message<ActionLogMessage> message : messages) {
42+
writer.append(schema.record(message.value));
4143
}
4244
}
4345
}
44-
45-
private ActionLogEntry entry(ActionLogMessage message) {
46-
var entry = new ActionLogEntry();
47-
entry.id = message.id;
48-
entry.date = message.date;
49-
entry.app = message.app;
50-
entry.host = message.host;
51-
entry.result = message.result;
52-
entry.action = message.action;
53-
entry.correlationIds = message.correlationIds;
54-
entry.clients = message.clients;
55-
entry.refIds = message.refIds;
56-
entry.errorCode = message.errorCode;
57-
entry.errorMessage = message.errorMessage;
58-
entry.elapsed = message.elapsed;
59-
entry.context = message.context;
60-
entry.stats = message.stats;
61-
entry.performanceStats = message.performanceStats;
62-
return entry;
63-
}
6446
}

ext/log-exporter/src/main/java/core/log/kafka/EventMessageHandler.java

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,45 @@
11
package core.log.kafka;
22

33
import core.framework.inject.Inject;
4-
import core.framework.internal.json.JSONWriter;
54
import core.framework.kafka.BulkMessageHandler;
65
import core.framework.kafka.Message;
76
import core.framework.log.message.EventMessage;
7+
import core.log.domain.EventSchema;
88
import core.log.service.ArchiveService;
9+
import org.apache.avro.file.DataFileWriter;
10+
import org.apache.avro.generic.GenericData;
11+
import org.apache.avro.specific.SpecificDatumWriter;
912

10-
import java.io.BufferedOutputStream;
1113
import java.io.IOException;
1214
import java.nio.file.Files;
1315
import java.nio.file.Path;
1416
import java.time.LocalDate;
1517
import java.util.List;
1618

17-
import static java.nio.file.StandardOpenOption.APPEND;
18-
import static java.nio.file.StandardOpenOption.CREATE;
19-
2019
/**
2120
* @author neo
2221
*/
2322
public class EventMessageHandler implements BulkMessageHandler<EventMessage> {
24-
private final JSONWriter<EventMessage> writer = new JSONWriter<>(EventMessage.class);
25-
2623
@Inject
2724
ArchiveService archiveService;
25+
@Inject
26+
EventSchema schema;
2827

2928
@Override
3029
public void handle(List<Message<EventMessage>> messages) throws IOException {
31-
LocalDate date = LocalDate.now();
30+
LocalDate now = LocalDate.now();
3231

33-
Path path = archiveService.initializeLogFilePath(archiveService.eventPath(date));
34-
try (BufferedOutputStream stream = new BufferedOutputStream(Files.newOutputStream(path, CREATE, APPEND), 3 * 1024 * 1024)) {
32+
Path path = archiveService.localEventFilePath(now);
33+
archiveService.createParentDir(path);
34+
35+
try (DataFileWriter<GenericData.Record> writer = new DataFileWriter<>(new SpecificDatumWriter<>(schema.schema))) {
36+
if (!Files.exists(path)) {
37+
writer.create(schema.schema, path.toFile());
38+
} else {
39+
writer.appendTo(path.toFile());
40+
}
3541
for (Message<EventMessage> message : messages) {
36-
stream.write(writer.toJSON(message.value));
37-
stream.write('\n');
42+
writer.append(schema.record(message.value));
3843
}
3944
}
4045
}

0 commit comments

Comments
 (0)