Skip to content

Commit 83793da

Browse files
committed
Merge branch 'main' into timesync-hashmap
2 parents 3630d0e + 21a286f commit 83793da

File tree

9 files changed

+209
-129
lines changed

9 files changed

+209
-129
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ simplelog = "0.12.2"
2222
csv = "1.3.1"
2323
chrono = "0.4.38"
2424
criterion = "0.5.1"
25+
anyhow = "1.0.93"
2526

2627
[[bench]]
2728
name = "high_sierra_benchmark"

deny.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ allow = [
105105
"BSL-1.0",
106106
"Unlicense",
107107
"Unicode-DFS-2016",
108+
"Unicode-3.0",
108109
]
109110
# List of explicitly disallowed licenses
110111
# See https://spdx.org/licenses/ for list of possible licenses

examples/unifiedlog_iterator/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@ simplelog = "0.12.2"
1010
csv = "1.3.0"
1111
chrono = "0.4.38"
1212
log = "0.4.22"
13+
serde_json = "1.0.122"
1314
macos-unifiedlogs = {path = "../../"}
1415
clap = {version = "4.5.18", features = ["derive"]}

examples/unifiedlog_iterator/src/main.rs

Lines changed: 129 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,15 @@ struct Args {
4141
/// Path to output file. Any directories must already exist
4242
#[clap(short, long, default_value = "")]
4343
output: String,
44+
45+
/// Output format. Options: csv, jsonl. Default is autodetect.
46+
#[clap(short, long, default_value = "auto")]
47+
format: String,
48+
49+
/// Append to output file
50+
/// If false, will overwrite output file
51+
#[clap(short, long, default_value = "false")]
52+
append: bool,
4453
}
4554

4655
fn main() {
@@ -50,19 +59,27 @@ fn main() {
5059
.expect("Failed to initialize simple logger");
5160

5261
let args = Args::parse();
53-
let mut writer = construct_writer(&args.output).unwrap();
54-
// Create headers for CSV file
55-
output_header(&mut writer).unwrap();
62+
let output_format = if args.format.is_empty() || args.format == "auto" {
63+
std::path::Path::new(&args.output)
64+
.extension()
65+
.and_then(std::ffi::OsStr::to_str)
66+
.unwrap_or("csv")
67+
.to_string()
68+
} else {
69+
args.format.clone()
70+
};
5671

57-
if args.input != "" {
72+
let mut writer = OutputWriter::new(&args.output, &output_format, args.append).unwrap();
73+
74+
if !args.input.is_empty() {
5875
parse_log_archive(&args.input, &mut writer);
5976
} else if args.live != "false" {
6077
parse_live_system(&mut writer);
6178
}
6279
}
6380

6481
// Parse a provided directory path. Currently, expect the path to follow macOS log collect structure
65-
fn parse_log_archive(path: &str, writer: &mut Writer<Box<dyn Write>>) {
82+
fn parse_log_archive(path: &str, writer: &mut OutputWriter) {
6683
let mut archive_path = PathBuf::from(path);
6784

6885
// Parse all UUID files which contain strings and other metadata
@@ -93,7 +110,7 @@ fn parse_log_archive(path: &str, writer: &mut Writer<Box<dyn Write>>) {
93110
}
94111

95112
// Parse a live macOS system
96-
fn parse_live_system(writer: &mut Writer<Box<dyn Write>>) {
113+
fn parse_live_system(writer: &mut OutputWriter) {
97114
let strings = collect_strings_system().unwrap();
98115
let shared_strings = collect_shared_strings_system().unwrap();
99116
let timesync_data = collect_timesync_system().unwrap();
@@ -116,7 +133,7 @@ fn parse_trace_file(
116133
shared_strings_results: &[SharedCacheStrings],
117134
timesync_data: &HashMap<String, TimesyncBoot>,
118135
path: &str,
119-
writer: &mut Writer<Box<dyn Write>>,
136+
writer: &mut OutputWriter,
120137
) {
121138
// We need to persist the Oversize log entries (they contain large strings that don't fit in normal log entries)
122139
// Some log entries have Oversize strings located in different tracev3 files.
@@ -302,7 +319,7 @@ fn iterate_chunks(
302319
strings_data: &[UUIDText],
303320
shared_strings: &[SharedCacheStrings],
304321
timesync_data: &HashMap<String, TimesyncBoot>,
305-
writer: &mut Writer<Box<dyn Write>>,
322+
writer: &mut OutputWriter,
306323
oversize_strings: &mut UnifiedLogData,
307324
) -> usize {
308325
let log_bytes = fs::read(path).unwrap();
@@ -341,71 +358,116 @@ fn iterate_chunks(
341358
count
342359
}
343360

344-
fn construct_writer(output_path: &str) -> Result<Writer<Box<dyn Write>>, Box<dyn Error>> {
345-
let writer = if output_path != "" {
346-
Box::new(
347-
OpenOptions::new()
348-
.append(true)
349-
.create(true)
350-
.open(output_path)?,
351-
) as Box<dyn Write>
352-
} else {
353-
Box::new(io::stdout()) as Box<dyn Write>
354-
};
355-
Ok(Writer::from_writer(writer))
361+
pub struct OutputWriter {
362+
writer: OutputWriterEnum,
356363
}
357364

358-
// Create csv file and create headers
359-
fn output_header(writer: &mut Writer<Box<dyn Write>>) -> Result<(), Box<dyn Error>> {
360-
writer.write_record(&[
361-
"Timestamp",
362-
"Event Type",
363-
"Log Type",
364-
"Subsystem",
365-
"Thread ID",
366-
"PID",
367-
"EUID",
368-
"Library",
369-
"Library UUID",
370-
"Activity ID",
371-
"Category",
372-
"Process",
373-
"Process UUID",
374-
"Message",
375-
"Raw Message",
376-
"Boot UUID",
377-
"System Timezone Name",
378-
])?;
379-
writer.flush()?;
380-
Ok(())
365+
enum OutputWriterEnum {
366+
Csv(Box<Writer<Box<dyn Write>>>),
367+
Json(Box<dyn Write>),
368+
}
369+
370+
impl OutputWriter {
371+
pub fn new(
372+
output_path: &str,
373+
output_format: &str,
374+
append: bool,
375+
) -> Result<Self, Box<dyn Error>> {
376+
let writer: Box<dyn Write> = if !output_path.is_empty() {
377+
Box::new(
378+
OpenOptions::new()
379+
.write(true)
380+
.create(true)
381+
.truncate(!append)
382+
.append(append)
383+
.open(output_path)?,
384+
)
385+
} else {
386+
Box::new(io::stdout())
387+
};
388+
389+
let writer_enum = match output_format {
390+
"csv" => {
391+
let mut csv_writer = Writer::from_writer(writer);
392+
// Write CSV headers
393+
csv_writer.write_record([
394+
"Timestamp",
395+
"Event Type",
396+
"Log Type",
397+
"Subsystem",
398+
"Thread ID",
399+
"PID",
400+
"EUID",
401+
"Library",
402+
"Library UUID",
403+
"Activity ID",
404+
"Category",
405+
"Process",
406+
"Process UUID",
407+
"Message",
408+
"Raw Message",
409+
"Boot UUID",
410+
"System Timezone Name",
411+
])?;
412+
csv_writer.flush()?;
413+
OutputWriterEnum::Csv(Box::new(csv_writer))
414+
}
415+
"jsonl" => OutputWriterEnum::Json(writer),
416+
_ => {
417+
eprintln!("Unsupported output format: {}", output_format);
418+
std::process::exit(1);
419+
}
420+
};
421+
422+
Ok(OutputWriter {
423+
writer: writer_enum,
424+
})
425+
}
426+
427+
pub fn write_record(&mut self, record: &LogData) -> Result<(), Box<dyn Error>> {
428+
match &mut self.writer {
429+
OutputWriterEnum::Csv(csv_writer) => {
430+
let date_time = Utc.timestamp_nanos(record.time as i64);
431+
csv_writer.write_record(&[
432+
date_time.to_rfc3339_opts(SecondsFormat::Millis, true),
433+
record.event_type.to_owned(),
434+
record.log_type.to_owned(),
435+
record.subsystem.to_owned(),
436+
record.thread_id.to_string(),
437+
record.pid.to_string(),
438+
record.euid.to_string(),
439+
record.library.to_owned(),
440+
record.library_uuid.to_owned(),
441+
record.activity_id.to_string(),
442+
record.category.to_owned(),
443+
record.process.to_owned(),
444+
record.process_uuid.to_owned(),
445+
record.message.to_owned(),
446+
record.raw_message.to_owned(),
447+
record.boot_uuid.to_owned(),
448+
record.timezone_name.to_owned(),
449+
])?;
450+
}
451+
OutputWriterEnum::Json(json_writer) => {
452+
writeln!(json_writer, "{}", serde_json::to_string(record).unwrap())?;
453+
}
454+
}
455+
Ok(())
456+
}
457+
458+
pub fn flush(&mut self) -> Result<(), Box<dyn Error>> {
459+
match &mut self.writer {
460+
OutputWriterEnum::Csv(csv_writer) => csv_writer.flush()?,
461+
OutputWriterEnum::Json(json_writer) => json_writer.flush()?,
462+
}
463+
Ok(())
464+
}
381465
}
382466

383467
// Append or create csv file
384-
fn output(
385-
results: &Vec<LogData>,
386-
writer: &mut Writer<Box<dyn Write>>,
387-
) -> Result<(), Box<dyn Error>> {
468+
fn output(results: &Vec<LogData>, writer: &mut OutputWriter) -> Result<(), Box<dyn Error>> {
388469
for data in results {
389-
let date_time = Utc.timestamp_nanos(data.time as i64);
390-
writer.write_record(&[
391-
date_time.to_rfc3339_opts(SecondsFormat::Millis, true),
392-
data.event_type.to_owned(),
393-
data.log_type.to_owned(),
394-
data.subsystem.to_owned(),
395-
data.thread_id.to_string(),
396-
data.pid.to_string(),
397-
data.euid.to_string(),
398-
data.library.to_owned(),
399-
data.library_uuid.to_owned(),
400-
data.activity_id.to_string(),
401-
data.category.to_owned(),
402-
data.process.to_owned(),
403-
data.process_uuid.to_owned(),
404-
data.message.to_owned(),
405-
data.raw_message.to_owned(),
406-
data.boot_uuid.to_owned(),
407-
data.timezone_name.to_owned(),
408-
])?;
470+
writer.write_record(data)?;
409471
}
410472
writer.flush()?;
411473
Ok(())

rustfmt.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
newline_style = "Unix"
2+
tab_spaces = 4
3+
max_width = 100
4+
chain_width = 60
5+
use_small_heuristics = "Default"

src/dsc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ mod tests {
317317
#[should_panic(expected = "Incomplete(Unknown)")]
318318
fn test_bad_file() {
319319
let mut test_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
320-
test_path.push("tests/test_data/Bad Data/DSC/badfile");
320+
test_path.push("tests/test_data/Bad Data/DSC/Badfile");
321321

322322
let buffer = fs::read(test_path).unwrap();
323323
let (_, _) = SharedCacheStrings::parse_dsc(&buffer).unwrap();

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
clippy::checked_conversions,
3232
clippy::unnecessary_cast
3333
)]
34+
3435
mod catalog;
3536
mod chunks;
3637
mod chunkset;

0 commit comments

Comments
 (0)