Skip to content

Commit cb65e5b

Browse files
committed
remove zstdascii and change hyperball and sccs clis to output files so we can use named pipes
1 parent f89a10f commit cb65e5b

File tree

4 files changed

+42
-52
lines changed

4 files changed

+42
-52
lines changed

cli/src/dist/hyperball/mod.rs

Lines changed: 29 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -18,46 +18,51 @@ use webgraph::{
1818
};
1919
use webgraph_algo::distances::hyperball::HyperBallBuilder;
2020

21-
#[derive(Args, Debug, Clone, Copy)]
21+
#[derive(Args, Debug, Clone)]
2222
#[clap(group = ArgGroup::new("centralities"))]
2323
/// Centralities that can be computed with hyperball.
24-
/// The output files will be named BASENAME.<centrality_name>.
24+
/// To compress the result you can use named pipes or process substitution
25+
/// like `--harmonic=>(zstd > harmonic.zstd)`.
2526
pub struct Centralities {
2627
/// How all the centralities will be stored.
2728
#[clap(long, value_enum, default_value_t = FloatVectorFormat::Ascii)]
2829
pub fmt: FloatVectorFormat,
30+
#[clap(long)]
2931
/// How many decimal digits will be used to store centralities in text formats.
3032
pub precision: Option<usize>,
3133

32-
/// Compute the approximate sum of distances and save them as BASENAME.sum_of_distances
34+
/// Compute the approximate sum of distances and save them as at the given path.
35+
#[clap(long)]
36+
pub sum_of_distances: Option<PathBuf>,
37+
/// Compute the approximate number of reachable nodes and save them as at the given path.
3338
#[clap(long)]
34-
pub sum_of_distances: bool,
35-
/// Compute the approximate number of reachable nodes and save them as BASENAME.reachable_nodes
39+
pub reachable_nodes: Option<PathBuf>,
40+
/// Compute the approximate harmonic centralities and save them as at the given path.
3641
#[clap(long)]
37-
pub reachable_nodes: bool,
38-
/// Compute the approximate harmonic centralities and save them as BASENAME.harmonic
42+
pub harmonic: Option<PathBuf>,
43+
/// Compute the approximate closeness centralities and save them as at the given path.
3944
#[clap(long)]
40-
pub harmonic: bool,
41-
/// Compute the approximate closeness centralities and save them as BASENAME.closeness
45+
pub closeness: Option<PathBuf>,
4246
#[clap(long)]
43-
pub closeness: bool,
44-
// TODO!: discounted ?
45-
// TODO!: neighborhood_function ?
47+
/// Compute the approximate neighborhood function and save it as at the given path.
48+
/// This is at most as big as the graph diameter and stores the number of
49+
/// paths of length 1, 2, ..., d.
50+
pub neighborhood_function: Option<PathBuf>,
4651
}
4752

4853
impl Centralities {
4954
pub fn should_compute_sum_of_distances(&self) -> bool {
50-
self.sum_of_distances || self.closeness
55+
self.sum_of_distances.is_some() || self.closeness.is_some()
5156
}
5257
pub fn should_compute_sum_of_inverse_distances(&self) -> bool {
53-
self.harmonic
58+
self.harmonic.is_some()
5459
}
5560
}
5661

5762
#[derive(Parser, Debug)]
5863
#[command(
5964
name = "hyperball",
60-
about = "Use hyperball to compute centralities. (WORK IN PROGRESS)",
65+
about = "Use hyperball to compute centralities.",
6166
long_about = ""
6267
)]
6368
pub struct CliArgs {
@@ -176,9 +181,8 @@ pub fn hyperball<E: Endianness>(global_args: GlobalArgs, args: CliArgs) -> Resul
176181
/// here we use a macro to avoid duplicating the code, it can't be a function
177182
/// because different centralities have different return types
178183
macro_rules! store_centrality {
179-
($flag:ident, $method:ident, $extension:literal, $description:expr) => {{
180-
if args.centralities.$flag {
181-
let path = args.basename.with_extension($extension);
184+
($flag:ident, $method:ident, $description:expr) => {{
185+
if let Some(path) = args.centralities.$flag {
182186
log::info!("Saving {} to {}", $description, path.display());
183187
let value = hb.$method()?;
184188
args.centralities
@@ -188,31 +192,14 @@ pub fn hyperball<E: Endianness>(global_args: GlobalArgs, args: CliArgs) -> Resul
188192
}};
189193
}
190194

191-
// TODO: store neighborhood function
192-
193-
store_centrality!(
194-
sum_of_distances,
195-
sum_of_distances,
196-
"sum_of_distances",
197-
"sum of distances"
198-
);
199-
store_centrality!(
200-
harmonic,
201-
harmonic_centralities,
202-
"harmonic",
203-
"harmonic centralities"
204-
);
205-
store_centrality!(
206-
closeness,
207-
closeness_centrality,
208-
"closeness",
209-
"closeness centralities"
210-
);
195+
store_centrality!(sum_of_distances, sum_of_distances, "sum of distances");
196+
store_centrality!(harmonic, harmonic_centralities, "harmonic centralities");
197+
store_centrality!(closeness, closeness_centrality, "closeness centralities");
198+
store_centrality!(reachable_nodes, reachable_nodes, "reachable nodes");
211199
store_centrality!(
212-
reachable_nodes,
213-
reachable_nodes,
214-
"reachable_nodes",
215-
"reachable nodes"
200+
neighborhood_function,
201+
neighborhood_function,
202+
"neighborhood function"
216203
);
217204

218205
Ok(())

cli/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,9 @@ pub struct BatchSizeArg {
177177
#[derive(Debug, Clone, Copy, ValueEnum)]
178178
/// How to store vectors of floats.
179179
pub enum FloatVectorFormat {
180-
/// Java-compatible format: a sequence of big-endian floats (32 bits).
180+
/// Java-compatible format: a sequence of big-endian floats (32 or 64 bits).
181181
Java,
182-
/// A slice of f32 serialized using ε-serde.
182+
/// A slice of floats (32 or 64 bits) serialized using ε-serde.
183183
Epserde,
184184
/// ASCII format, one float per line.
185185
Ascii,

cli/src/sccs.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,14 @@ pub struct CliArgs {
3636
pub basename: PathBuf,
3737

3838
#[arg(short, long)]
39-
/// Compute the size of the strongly connected components.
40-
pub sizes: bool,
39+
/// The path where to save the sccs. On bash / zsh, you can compress the
40+
/// output using `--dst=>(zstd > sccs.zstd)`
41+
pub dst: PathBuf,
42+
43+
#[arg(short, long)]
44+
/// Compute the size of the strongly connected components and store them
45+
/// at the given path.
46+
pub sizes: Option<PathBuf>,
4147

4248
#[arg(short, long)]
4349
/// Renumber components in decreasing-size order (implicitly, compute sizes).
@@ -102,9 +108,6 @@ where
102108
sccs.num_components()
103109
);
104110

105-
let path = args.basename.with_extension("sccs");
106-
let sizes_path = args.basename.with_extension("sccsizes");
107-
108111
if args.renumber {
109112
log::info!("Renumbering components by decreasing size");
110113
let component_sizes = if args.num_threads == 1 {
@@ -116,15 +119,15 @@ where
116119
thread_pool.install(|| sccs.par_sort_by_size())
117120
};
118121
let max = component_sizes.first().copied();
119-
args.fmt.store_usizes(sizes_path, &component_sizes, max)?;
120-
} else if args.sizes {
122+
args.fmt.store_usizes(&args.dst, &component_sizes, max)?;
123+
} else if let Some(sizes_path) = args.sizes {
121124
log::info!("Computing the sizes of the components");
122125
let sizes = sccs.compute_sizes();
123126
args.fmt.store_usizes(sizes_path, &sizes, None)?;
124127
};
125128

126129
args.fmt
127-
.store_usizes(path, sccs.components(), Some(sccs.num_components()))?;
130+
.store_usizes(&args.dst, sccs.components(), Some(sccs.num_components()))?;
128131

129132
Ok(())
130133
}

harmonic.zstd

496 KB
Binary file not shown.

0 commit comments

Comments
 (0)