diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 05b86855875..acfbd95e862 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,6 +44,43 @@ To build all code paths, improve build times in VSCode, and prevent locking the Note: the path in `ICU4X_DATA_DIR` is relative to `provider/data/*/src/lib.rs` and it causes VSCode to build ICU4X with only the `und` locale. This reduces build times but also makes some tests fail; to run them normally, run `cargo test --all-features` on the command line. +### Building and Rebuilding Repo Data + +In the ICU4X repository, there are a few types of locale data: + +1. Test data: used for internal ICU4X development purposes only + - Downloaded data sources: `provider/source/tests/data` + - Regen: `cargo make download-repo-sources` + - Generated JSON data: `provider/source/data/debug` + - Regen: `cargo make testdata` +2. Hard-coded source data: source of truth is this repo; used by icu4x-datagen + - Segmenter TOML files: `provider/source/data/segmenter` +3. Runtime default compiled data: the `icu_*_data` crates + - Crate roots: `provider/data` + - Regen: `cargo make bakeddata` + - Regen a specific component: `cargo make bakeddata ` + +During development, it is often convenient to generate only a single data marker as JSON. To do this (fully offline), you can run, for example: + +```bash +$ cargo run -p icu4x-datagen \ + --no-default-features --features provider,fs_exporter \ + -- --format fs --pretty -o _debug/data \ + --cldr-root provider/source/tests/data/cldr \ + --icuexport-root provider/source/tests/data/icuexport \ + --segmenter-lstm-root provider/source/tests/data/lstm \ + --tzdb-root provider/source/tests/data/tzdb \ + --deduplication none \ + --locales ru th \ + --markers DatetimePatternsDateGregorianV1 DatetimePatternsDateBuddhistV1 +``` + +Tips: + +- Set your desired locales and data markers on the bottom two lines. +- To overwrite the directly, add: `-W` +- To print verbose logs, add: `-v` + ## Contributing a Pull Request The first step is to fork the repository to your namespace and create a branch off of the `main` branch to work with. @@ -79,10 +116,12 @@ There are various files that auto-generated across the ICU4X repository. Here a need to run in order to recreate them. These files may be run in more comprehensive tests such as those included in `cargo make ci-job-test` or `cargo make ci-all`. - `cargo make testdata` - regenerates all test data in the `provider/source/debug` directory. - - `cargo make bakeddata` - regenerates baked data in the `provider/data` directory. - - `cargo make bakeddata foo` can be used to generate data in `provider/data/foo` only. + - Tip: See [Building and Rebuilding Repo Data](#building-and-rebuilding-repo-data) for additional shortcuts. +- `cargo make bakeddata` - regenerates baked data in the `provider/data` directory. + - `cargo make bakeddata foo` can be used to generate data in `provider/data/foo` only. - `cargo make generate-readmes` - generates README files according to Rust docs. Output files must be committed in git for check to pass. - `cargo make diplomat-gen` - recreates the Diplomat generated files in the `ffi/capi` directory. +- `cargo make codegen` - recreates certain Askama generated files in the `ffi/capi/src` directory based on templates in `tools/make/codegen/templates`. ### Testing diff --git a/provider/icu4x-datagen/Cargo.toml b/provider/icu4x-datagen/Cargo.toml index d8dec531c31..19a77b27c61 100644 --- a/provider/icu4x-datagen/Cargo.toml +++ b/provider/icu4x-datagen/Cargo.toml @@ -19,7 +19,7 @@ version.workspace = true [dependencies] icu_provider = { workspace = true } icu = { workspace = true, features = ["datagen"] } -icu_provider_export = { workspace = true, features = ["rayon"] } +icu_provider_export = { workspace = true } icu_provider_source = { workspace = true, optional = true } icu_provider_registry = { workspace = true } @@ -31,7 +31,7 @@ log = { workspace = true } simple_logger = { workspace = true } [features] -default = ["use_wasm", "networking", "fs_exporter", "blob_exporter", "baked_exporter", "provider"] +default = ["use_wasm", "networking", "fs_exporter", "blob_exporter", "baked_exporter", "provider", "rayon"] provider = ["dep:icu_provider_source"] baked_exporter = ["icu_provider_export/baked_exporter"] blob_exporter = ["icu_provider_export/blob_exporter"] @@ -46,6 +46,7 @@ use_wasm = ["icu_provider_source?/use_wasm"] use_icu4c = ["icu_provider_source?/use_icu4c"] networking = ["icu_provider_source?/networking"] experimental = ["icu_provider_source?/experimental", "icu/experimental"] +rayon = ["icu_provider_export/rayon"] [package.metadata.cargo-all-features] # We don't need working CPT builders for check diff --git a/provider/icu4x-datagen/src/main.rs b/provider/icu4x-datagen/src/main.rs index 1a1fa087628..dc6cd958fd7 100644 --- a/provider/icu4x-datagen/src/main.rs +++ b/provider/icu4x-datagen/src/main.rs @@ -380,7 +380,7 @@ fn main() -> eyre::Result<()> { #[cfg(not(feature = "networking"))] (None, _) => { eyre::bail!( - "Downloading data from tags requires the `networking` Cargo feature" + "Please set --cldr-root or enable the `networking` Cargo feature" ) } }; @@ -396,7 +396,7 @@ fn main() -> eyre::Result<()> { #[cfg(not(feature = "networking"))] (None, _) => { eyre::bail!( - "Downloading data from tags requires the `networking` Cargo feature" + "Please set --icuexport-root or enable the `networking` Cargo feature" ) } }; @@ -412,7 +412,7 @@ fn main() -> eyre::Result<()> { #[cfg(not(feature = "networking"))] (None, _) => { eyre::bail!( - "Downloading data from tags requires the `networking` Cargo feature" + "Please set --segmenter-lstm-root or enable the `networking` Cargo feature" ) } }; @@ -428,7 +428,7 @@ fn main() -> eyre::Result<()> { #[cfg(not(feature = "networking"))] (None, _) => { eyre::bail!( - "Downloading data from tags requires the `networking` Cargo feature" + "Please set --tzdb-root or enable the `networking` Cargo feature" ) } };