Skip to content

Commit eaaa541

Browse files
committed
adding working polars command-line tool
1 parent 5d11952 commit eaaa541

File tree

6 files changed

+358
-0
lines changed

6 files changed

+358
-0
lines changed

README.md

+8
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,14 @@ pub async fn complete_prompt(prompt: &str) -> Result<String, Box<dyn std::error:
990990

991991
```
992992
993+
### Command-line Data Science with Rust (Action Items)
994+
995+
996+
1. go into `dscli`
997+
2. Figure the way to make Polars work with `linfa`
998+
3. How can I make a kmeans cluster using Polars
999+
1000+
9931001
### Containerized Actix Continuous Delivery to AWS App Runner
9941002
9951003
1. cd into `webdocker`

dscli/Cargo.toml

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[package]
2+
name = "dscli"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
8+
[dependencies]
9+
clap = {version="4.0.32", features=["derive"]}
10+
polars = "0.26.1"

dscli/Makefile

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
format:
2+
cargo fmt --quiet
3+
4+
lint:
5+
cargo clippy --quiet
6+
7+
test:
8+
cargo test --quiet
9+
10+
run:
11+
cargo run
12+
13+
all: format lint test run

dscli/src/data/global-life-expt-2022.csv

+267
Large diffs are not rendered by default.

dscli/src/lib.rs

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/*
2+
A library that provides introspects global life expectancy data using Polars
3+
*/
4+
5+
use polars::prelude::*;
6+
pub const CSV_FILE: &str = "src/data/global-life-expt-2022.csv";
7+
8+
//read in a csv file
9+
pub fn read_csv(path: &str) -> DataFrame {
10+
CsvReader::from_path(path).unwrap().finish().unwrap()
11+
}
12+
13+
//print "n" rows of a dataframe
14+
pub fn print_df(df: &DataFrame, n: usize) {
15+
println!("{:?}", df.head(Some(n)));
16+
}
17+
18+
//do kmeans clustering of 2018, 2019, 2020 in the dataframe
19+
//returns a dataframe with the cluster column added
20+
//TBD for next time
21+
//pub fn cluster(df: &DataFrame) -> DataFrame {

dscli/src/main.rs

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/* Life-expectancy CLI that uses Polars and Clap
2+
*/
3+
4+
use clap::Parser;
5+
use dscli::CSV_FILE;
6+
7+
#[derive(Parser)]
8+
//add extended help
9+
#[clap(
10+
version = "1.0",
11+
author = "Noah Gift",
12+
about = "A data science CLI that uses Polars and Clap",
13+
after_help = "Example: cargo run -- print --rows 3"
14+
)]
15+
struct Cli {
16+
#[clap(subcommand)]
17+
command: Option<Commands>,
18+
}
19+
20+
#[derive(Parser)]
21+
enum Commands {
22+
Print {
23+
#[clap(long, default_value = CSV_FILE)]
24+
path: String,
25+
#[clap(long, default_value = "10")]
26+
rows: usize,
27+
},
28+
}
29+
30+
fn main() {
31+
let args = Cli::parse();
32+
match args.command {
33+
Some(Commands::Print { path, rows }) => {
34+
let df = dscli::read_csv(&path);
35+
dscli::print_df(&df, rows);
36+
}
37+
None => println!("No command given"),
38+
}
39+
}

0 commit comments

Comments
 (0)