Skip to content

Commit 244006c

Browse files
Pipeline (#17)
1 parent 23fb4cc commit 244006c

File tree

3 files changed

+66
-61
lines changed

3 files changed

+66
-61
lines changed

README.org

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,21 @@ then you can build and run the project with the following command.
2929
This section provides a high-level overview of the system's architecture: the main components and their interactions.
3030

3131
*** Main Components
32-
- =main.rs= :: The entry point of the application. Handles command-line arguments and initiates the processing workflow.
32+
- =main.rs= :: The entry point of the application. Handles command-line arguments.
33+
- =pipeline.rs= :: Orchestrating the processing workflow by calling other modules.
3334
- =pre_processing.rs= :: Manages the initial parsing and preparation of data, utilizing memory-mapped files for efficient access.
3435
- =compute.rs= :: Contains the core logic for processing the temperature data, including calculations for min, mean, and max temperatures.
35-
- =weather.rs= :: Defines data structures and utility functions that are used throughout the application.
3636
- =aggregate.rs= :: Responsible for aggregating the results of the temperature data processing.
37+
- =weather.rs= :: Defines data structures and utility functions that are used throughout the application.
3738

3839
*** Workflow
3940
1. *Initialization*: The application starts in =main.rs=, where it parses command-line arguments to get the path of the input file.
40-
2. *Data Loading*: =pre_processing.rs= handles the loading of the input data file using memory-mapped files to efficiently manage large data volumes.
41-
3. *Data Processing*: =compute.rs= processes the loaded data, calculating the required statistics (min, mean, max) for each weather station.
42-
4. *Aggregation*: =aggregate.rs= aggregates the computed results for final output.
43-
5. *Output*: Results are then output in the format specified by the challenge requirements.
41+
2. *Orchestration*: =pipeline.rs= sets up the workflow by calling other modules.
42+
3. *Data Loading*: =pre_processing.rs= handles the loading of the input data file using memory-mapped files to efficiently manage large data volumes.
43+
4. *Data Processing*: =compute.rs= processes the loaded data, calculating the required statistics (min, mean, max) for each weather station.
44+
5. *Aggregation*: =aggregate.rs= aggregates the computed results for final output.
45+
6. *Output*: Results are then output in the format specified by the challenge requirements.
4446

4547
*** Boundaries
46-
- Module Boundaries: Clear separation between data loading (=pre_processing.rs=), data processing (=compute.rs=), aggregation (=aggregate.rs=), and utility functions (=weather.rs=).
47-
- Separating I/O: The application logic off-loads to business logic functions to keep them I/O independent. This separation ensures that the core algorithms remain focused on computation without being coupled to input/output operations enhancing testability.
48+
- Module Boundaries: Clear separation between orchestration (=pipeline.rs=), data loading (=pre_processing.rs=), data processing (=compute.rs=), aggregation (=aggregate.rs=), and utility functions (=weather.rs=).
49+
- Separating I/O: The application logic is off-loaded to business logic functions within =pipeline.rs=, ensuring that core algorithms remain focused on computation without being coupled to input/output operations.

src/main.rs

Lines changed: 8 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,25 @@
11
mod aggregate;
22
mod compute;
3+
mod pipeline;
34
mod pre_processing;
45
mod weather;
56

6-
use memmap2::MmapOptions;
77
use std::fs::File;
88
use std::path::PathBuf;
9-
use std::sync::mpsc;
10-
use std::sync::Arc;
11-
use std::thread;
129

13-
fn main() {
10+
fn main() -> Result<(), std::io::Error> {
1411
let args = std::env::args().collect::<Vec<_>>();
1512
if args.len() < 2 {
16-
eprintln!("Usage: {} </path/to/measurements.txt>", args[0]);
13+
eprintln!("Usage: {} </path/to/measurements.txt>", &args[0]);
1714
std::process::exit(1);
1815
}
19-
let path = PathBuf::from(&args[1]);
20-
match run(path) {
21-
Ok(res) => print_results(&res),
22-
Err(RunErr::IO(e)) => eprintln!("{}", e),
23-
}
24-
}
25-
26-
fn run(path: PathBuf) -> Result<Vec<weather::Station>, RunErr> {
27-
let file = File::open(path).unwrap();
28-
let mmap = Arc::new(unsafe { MmapOptions::new().map(&file).map_err(RunErr::IO)? });
29-
let (tx, rx) = mpsc::channel();
30-
pre_processing::Partition::try_from(&*mmap as &[u8])
31-
.map_err(RunErr::IO)?
32-
.chunks
33-
.into_iter()
34-
.for_each(|chunk| {
35-
let tx = tx.clone();
36-
let mmap = Arc::clone(&mmap);
37-
thread::spawn(move || compute::stats(&mmap[chunk], tx));
38-
});
39-
drop(tx);
40-
Ok(aggregate::reduce(rx))
41-
}
42-
43-
#[derive(Debug)]
44-
enum RunErr {
45-
IO(std::io::Error),
16+
let file = File::open(PathBuf::from(&args[1]))?;
17+
let res = pipeline::run(&file)?;
18+
print_formatted(&res);
19+
Ok(())
4620
}
4721

48-
fn print_results(v: &[weather::Station]) {
22+
fn print_formatted(v: &[weather::Station]) {
4923
print!("{{");
5024
for (i, record) in v.iter().enumerate() {
5125
if i < v.len() - 1 {
@@ -56,22 +30,3 @@ fn print_results(v: &[weather::Station]) {
5630
}
5731
println!("}}")
5832
}
59-
60-
#[cfg(test)]
61-
mod test {
62-
use super::*;
63-
64-
#[test]
65-
fn integration() {
66-
let path = PathBuf::from("./data/measurements-test.txt");
67-
let actual = run(path).unwrap();
68-
let expected = vec![
69-
weather::Station::new("London", 85, 95, 180, 2),
70-
weather::Station::new("New York", 35, 150, 185, 2),
71-
weather::Station::new("Oslo", -100, 102, 2, 2),
72-
weather::Station::new("Paris", 130, 130, 130, 1),
73-
weather::Station::new("Stockholm", -5, 200, 210, 3),
74-
];
75-
assert_eq!(actual, expected);
76-
}
77-
}

src/pipeline.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
use crate::aggregate;
2+
use crate::compute;
3+
use crate::pre_processing;
4+
use crate::weather;
5+
use memmap2::Mmap;
6+
use std::fs::File;
7+
use std::sync::mpsc;
8+
use std::sync::Arc;
9+
use std::thread;
10+
11+
type Result = std::result::Result<Vec<weather::Station>, std::io::Error>;
12+
13+
pub fn run(file: &File) -> Result {
14+
let mmap = Arc::new(unsafe { Mmap::map(file)? });
15+
let (tx, rx) = mpsc::channel();
16+
pre_processing::Partition::try_from(&*mmap as &[u8])?
17+
.chunks
18+
.into_iter()
19+
.for_each(|chunk| {
20+
let tx = tx.clone();
21+
let mmap = Arc::clone(&mmap);
22+
thread::spawn(move || compute::stats(&mmap[chunk], tx));
23+
});
24+
drop(tx);
25+
Ok(aggregate::reduce(rx))
26+
}
27+
28+
#[cfg(test)]
29+
mod test {
30+
use super::*;
31+
use std::fs::File;
32+
use std::path::PathBuf;
33+
34+
#[test]
35+
fn test_run() {
36+
let file = File::open(PathBuf::from("./data/measurements-test.txt"))
37+
.expect("Test file {path} not found");
38+
let actual = run(&file).unwrap();
39+
let expected = vec![
40+
weather::Station::new("London", 85, 95, 180, 2),
41+
weather::Station::new("New York", 35, 150, 185, 2),
42+
weather::Station::new("Oslo", -100, 102, 2, 2),
43+
weather::Station::new("Paris", 130, 130, 130, 1),
44+
weather::Station::new("Stockholm", -5, 200, 210, 3),
45+
];
46+
assert_eq!(actual, expected);
47+
}
48+
}

0 commit comments

Comments
 (0)