From 2bb3f46197ec0be15d5366efb1b5a2b7e5672483 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Wed, 26 Mar 2025 14:07:09 +0800 Subject: [PATCH 01/24] refactor remote function --- wren-core-py/Cargo.lock | 108 ++++++++--- wren-core-py/src/context.rs | 224 ++++++++++++++--------- wren-core-py/src/errors.rs | 2 +- wren-core-py/src/remote_functions.rs | 18 +- wren-core-py/tests/test_modeling_core.py | 4 +- 5 files changed, 244 insertions(+), 112 deletions(-) diff --git a/wren-core-py/Cargo.lock b/wren-core-py/Cargo.lock index 3c2e3f958..0fd615103 100644 --- a/wren-core-py/Cargo.lock +++ b/wren-core-py/Cargo.lock @@ -278,6 +278,7 @@ dependencies = [ "chrono", "half", "indexmap 2.8.0", + "indexmap 2.8.0", "lexical-core", "num", "serde", @@ -350,9 +351,12 @@ dependencies = [ [[package]] name = "async-compression" version = "0.4.19" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ + "bzip2", "bzip2", "flate2", "futures-core", @@ -367,8 +371,10 @@ dependencies = [ [[package]] name = "async-trait" version = "0.1.88" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", @@ -448,8 +454,10 @@ dependencies = [ [[package]] name = "blake3" version = "1.7.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b17679a8d69b6d7fd9cd9801a536cec9fa5e5970b69f9d4747f70b39b031f5e7" +checksum = "b17679a8d69b6d7fd9cd9801a536cec9fa5e5970b69f9d4747f70b39b031f5e7" dependencies = [ "arrayref", "arrayvec", @@ -738,7 +746,8 @@ dependencies = [ [[package]] name = "datafusion" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914e6f9525599579abbd90b0f7a55afcaaaa40350b9e9ed52563f126dfe45fd3" dependencies = [ "arrow", "arrow-ipc", @@ -746,6 +755,7 @@ dependencies = [ "async-trait", "bytes", "bzip2", + "bzip2", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -788,7 +798,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "998a6549e6ee4ee3980e05590b2960446a56b343ea30199ef38acd0e0b9036e2" dependencies = [ "arrow", "async-trait", @@ -807,7 +818,8 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ac10096a5b3c0d8a227176c0e543606860842e943594ccddb45cf42a526e43" dependencies = [ "arrow", "async-trait", @@ -828,7 +840,8 @@ dependencies = [ [[package]] name = "datafusion-common" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f53d7ec508e1b3f68bd301cee3f649834fad51eff9240d898a4b2614cfd0a7a" dependencies = [ "ahash", "arrow", @@ -837,6 +850,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap 2.8.0", + "indexmap 2.8.0", "libc", "log", "object_store", @@ -851,7 +865,8 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0fcf41523b22e14cc349b01526e8b9f59206653037f2949a4adbfde5f8cb668" dependencies = [ "log", "tokio", @@ -860,13 +875,15 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf7f37ad8b6e88b46c7eeab3236147d32ea64b823544f498455a8d9042839c92" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", "bzip2", + "bzip2", "chrono", "datafusion-catalog", "datafusion-common", @@ -893,12 +910,14 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7db7a0239fd060f359dc56c6e7db726abaa92babaed2fb2e91c3a8b2fff8b256" [[package]] name = "datafusion-execution" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0938f9e5b6bc5782be4111cdfb70c02b7b5451bf34fd57e4de062a7f7c4e31f1" dependencies = [ "arrow", "dashmap", @@ -916,7 +935,8 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b36c28b00b00019a8695ad7f1a53ee1673487b90322ecbd604e2cf32894eb14f" dependencies = [ "arrow", "chrono", @@ -927,6 +947,7 @@ dependencies = [ "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap 2.8.0", + "indexmap 2.8.0", "paste", "recursive", "serde_json", @@ -936,11 +957,13 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e" dependencies = [ "arrow", "datafusion-common", "indexmap 2.8.0", + "indexmap 2.8.0", "itertools 0.14.0", "paste", ] @@ -948,7 +971,8 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3196e37d7b65469fb79fee4f05e5bb58a456831035f9a38aa5919aeb3298d40" dependencies = [ "arrow", "arrow-buffer", @@ -976,7 +1000,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adfc2d074d5ee4d9354fdcc9283d5b2b9037849237ddecb8942a29144b77ca05" dependencies = [ "ahash", "arrow", @@ -996,7 +1021,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cbceba0f98d921309a9121b702bcd49289d383684cccabf9a92cda1602f3bbb" dependencies = [ "ahash", "arrow", @@ -1008,7 +1034,8 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "170e27ce4baa27113ddf5f77f1a7ec484b0dbeda0c7abbd4bad3fc609c8ab71a" dependencies = [ "arrow", "arrow-ord", @@ -1028,7 +1055,8 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d3a06a7f0817ded87b026a437e7e51de7f59d48173b0a4e803aa896a7bd6bb5" dependencies = [ "arrow", "async-trait", @@ -1043,7 +1071,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6c608b66496a1e05e3d196131eb9bebea579eed1f59e88d962baf3dda853bc6" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1059,7 +1088,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da2f9d83348957b4ad0cd87b5cb9445f2651863a36592fe5484d43b49a5f8d82" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1068,7 +1098,8 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7" dependencies = [ "datafusion-expr", "quote", @@ -1078,7 +1109,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "971c51c54cd309001376fae752fb15a6b41750b6d1552345c46afbfb6458801b" dependencies = [ "arrow", "chrono", @@ -1086,6 +1118,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "indexmap 2.8.0", + "indexmap 2.8.0", "itertools 0.14.0", "log", "recursive", @@ -1096,7 +1129,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1447c2c6bc8674a16be4786b4abf528c302803fafa186aa6275692570e64d85" dependencies = [ "ahash", "arrow", @@ -1108,6 +1142,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap 2.8.0", + "indexmap 2.8.0", "itertools 0.14.0", "log", "paste", @@ -1117,7 +1152,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f8c25dcd069073a75b3d2840a79d0f81e64bdd2c05f2d3d18939afb36a7dcb" dependencies = [ "ahash", "arrow", @@ -1130,7 +1166,8 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68da5266b5b9847c11d1b3404ee96b1d423814e1973e1ad3789131e5ec912763" dependencies = [ "arrow", "datafusion-common", @@ -1148,7 +1185,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88cc160df00e413e370b3b259c8ea7bfbebc134d32de16325950e9e923846b7f" dependencies = [ "ahash", "arrow", @@ -1167,6 +1205,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap 2.8.0", + "indexmap 2.8.0", "itertools 0.14.0", "log", "parking_lot", @@ -1177,13 +1216,15 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "46.0.1" -source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "325a212b67b677c0eb91447bf9a11b630f9fc4f62d8e5d145bf859f5a6b29e64" dependencies = [ "arrow", "bigdecimal", "datafusion-common", "datafusion-expr", "indexmap 2.8.0", + "indexmap 2.8.0", "log", "recursive", "regex", @@ -1457,8 +1498,10 @@ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "half" version = "2.5.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1" +checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1" dependencies = [ "cfg-if", "crunchy", @@ -1687,8 +1730,10 @@ dependencies = [ [[package]] name = "indexmap" version = "2.8.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" +checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -1829,8 +1874,10 @@ dependencies = [ [[package]] name = "libc" version = "0.2.171" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libm" @@ -2134,6 +2181,7 @@ checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", "indexmap 2.8.0", + "indexmap 2.8.0", ] [[package]] @@ -2316,8 +2364,10 @@ dependencies = [ [[package]] name = "quote" version = "1.0.40" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -2556,6 +2606,7 @@ dependencies = [ "hex", "indexmap 1.9.3", "indexmap 2.8.0", + "indexmap 2.8.0", "serde", "serde_derive", "serde_json", @@ -2727,8 +2778,10 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" version = "2.0.100" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -2850,8 +2903,10 @@ dependencies = [ [[package]] name = "tokio" version = "1.44.1" +version = "1.44.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" +checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" dependencies = [ "backtrace", "bytes", @@ -2873,8 +2928,10 @@ dependencies = [ [[package]] name = "tokio-util" version = "0.7.14" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034" +checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034" dependencies = [ "bytes", "futures-core", @@ -2895,6 +2952,7 @@ version = "0.22.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee" dependencies = [ + "indexmap 2.8.0", "indexmap 2.8.0", "toml_datetime", "winnow", @@ -3003,8 +3061,10 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" version = "1.16.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ "getrandom 0.3.1", "js-sys", diff --git a/wren-core-py/src/context.rs b/wren-core-py/src/context.rs index fae3f88d6..9f2a5f9e2 100644 --- a/wren-core-py/src/context.rs +++ b/wren-core-py/src/context.rs @@ -20,11 +20,13 @@ use crate::manifest::to_manifest; use crate::remote_functions::PyRemoteFunction; use log::debug; use pyo3::{pyclass, pymethods, PyErr, PyResult}; -use std::collections::hash_map::Entry; -use std::collections::HashMap; use std::hash::Hash; use std::ops::ControlFlow; +use std::str::FromStr; use std::sync::Arc; +use std::vec; +use tokio::runtime::Runtime; +use wren_core::array::{AsArray, GenericListArray}; use wren_core::ast::{visit_statements_mut, Expr, Statement, Value}; use wren_core::dialect::GenericDialect; use wren_core::logical_plan::utils::map_data_type; @@ -33,20 +35,22 @@ use wren_core::mdl::function::{ ByPassAggregateUDF, ByPassScalarUDF, ByPassWindowFunction, FunctionType, RemoteFunction, }; -use wren_core::{mdl, AggregateUDF, AnalyzedWrenMDL, ScalarUDF, WindowUDF}; +use wren_core::{ + mdl, AggregateUDF, AnalyzedWrenMDL, ScalarUDF, SessionConfig, WindowUDF, +}; + /// The Python wrapper for the Wren Core session context. #[pyclass(name = "SessionContext")] #[derive(Clone)] pub struct PySessionContext { ctx: wren_core::SessionContext, mdl: Arc, - remote_functions: Vec, + runtime: Arc, } impl Hash for PySessionContext { fn hash(&self, state: &mut H) { self.mdl.hash(state); - self.remote_functions.hash(state); } } @@ -55,7 +59,7 @@ impl Default for PySessionContext { Self { ctx: wren_core::SessionContext::new(), mdl: Arc::new(AnalyzedWrenMDL::default()), - remote_functions: vec![], + runtime: Arc::new(Runtime::new().unwrap()), } } } @@ -79,13 +83,34 @@ impl PySessionContext { .map(|f| f.into()) .collect::>(); - let ctx = wren_core::SessionContext::new(); + let config = SessionConfig::default().with_information_schema(true); + let ctx = wren_core::SessionContext::new_with_config(config); + let runtime = Runtime::new().map_err(CoreError::from)?; + + let registered_functions = runtime + .block_on(Self::get_regietered_functions(&ctx)) + .map(|functions| { + functions + .into_iter() + .map(|f| f.name) + .collect::>() + }) + .map_err(CoreError::from)?; + + remote_functions.into_iter().try_for_each(|remote_function| { + debug!("Registering remote function: {:?}", remote_function); + // TODO: check not only the name but also the return type and the parameter types + if !registered_functions.contains(&remote_function.name) { + Self::register_remote_function(&ctx, remote_function)?; + } + Ok::<(), CoreError>(()) + })?; let Some(mdl_base64) = mdl_base64 else { return Ok(Self { ctx, mdl: Arc::new(AnalyzedWrenMDL::default()), - remote_functions, + runtime: Arc::new(runtime), }); }; @@ -97,98 +122,39 @@ impl PySessionContext { let analyzed_mdl = Arc::new(analyzed_mdl); - let runtime = tokio::runtime::Runtime::new().map_err(CoreError::from)?; let ctx = runtime .block_on(create_ctx_with_mdl(&ctx, Arc::clone(&analyzed_mdl), false)) .map_err(CoreError::from)?; - remote_functions.iter().try_for_each(|remote_function| { - debug!("Registering remote function: {:?}", remote_function); - Self::register_remote_function(&ctx, remote_function)?; - Ok::<(), CoreError>(()) - })?; - Ok(Self { ctx, mdl: analyzed_mdl, - remote_functions, + runtime: Arc::new(runtime), }) } /// Transform the given Wren SQL to the equivalent Planned SQL. pub fn transform_sql(&self, sql: &str) -> PyResult { - mdl::transform_sql(Arc::clone(&self.mdl), &self.remote_functions, sql) + self.runtime + .block_on(mdl::transform_sql_with_ctx( + &self.ctx, + Arc::clone(&self.mdl), + &[], + sql, + )) .map_err(|e| PyErr::from(CoreError::from(e))) } /// Get the available functions in the session context. pub fn get_available_functions(&self) -> PyResult> { - let mut builder = self - .remote_functions - .iter() - .map(|f| (f.name.clone(), f.clone().into())) - .collect::>(); - self.ctx - .state() - .scalar_functions() - .iter() - .for_each(|(name, _func)| { - match builder.entry(name.clone()) { - Entry::Occupied(_) => {} - Entry::Vacant(entry) => { - entry.insert(PyRemoteFunction { - function_type: "scalar".to_string(), - name: name.clone(), - // TODO: get function return type from SessionState - return_type: None, - param_names: None, - param_types: None, - description: None, - }); - } - } - }); - self.ctx - .state() - .aggregate_functions() - .iter() - .for_each(|(name, _func)| { - match builder.entry(name.clone()) { - Entry::Occupied(_) => {} - Entry::Vacant(entry) => { - entry.insert(PyRemoteFunction { - function_type: "aggregate".to_string(), - name: name.clone(), - // TODO: get function return type from SessionState - return_type: None, - param_names: None, - param_types: None, - description: None, - }); - } - } - }); - self.ctx - .state() - .window_functions() - .iter() - .for_each(|(name, _func)| { - match builder.entry(name.clone()) { - Entry::Occupied(_) => {} - Entry::Vacant(entry) => { - entry.insert(PyRemoteFunction { - function_type: "window".to_string(), - name: name.clone(), - // TODO: get function return type from SessionState - return_type: None, - param_names: None, - param_types: None, - description: None, - }); - } - } - }); - Ok(builder.values().cloned().collect()) + let registered_functions: Vec = self + .runtime + .block_on(Self::get_regietered_functions(&self.ctx)) + .map_err(CoreError::from)? + .into_iter() + .map(|f| f.into()) + .collect::>(); + Ok(registered_functions) } /// Push down the limit to the given SQL. @@ -232,7 +198,7 @@ impl PySessionContext { impl PySessionContext { fn register_remote_function( ctx: &wren_core::SessionContext, - remote_function: &RemoteFunction, + remote_function: RemoteFunction, ) -> PyResult<()> { match &remote_function.function_type { FunctionType::Scalar => { @@ -240,6 +206,7 @@ impl PySessionContext { &remote_function.name, map_data_type(&remote_function.return_type) .map_err(CoreError::from)?, + remote_function.description, ))) } FunctionType::Aggregate => { @@ -247,6 +214,7 @@ impl PySessionContext { &remote_function.name, map_data_type(&remote_function.return_type) .map_err(CoreError::from)?, + remote_function.description, ))) } FunctionType::Window => { @@ -254,6 +222,7 @@ impl PySessionContext { &remote_function.name, map_data_type(&remote_function.return_type) .map_err(CoreError::from)?, + remote_function.description, ))) } } @@ -275,4 +244,91 @@ impl PySessionContext { Ok(vec![]) } } + + async fn get_regietered_functions( + ctx: &wren_core::SessionContext, + ) -> PyResult> { + let sql = r#" + WITH inputs AS ( + SELECT + r.specific_name, + r.data_type as return_type, + pi.rid, + array_agg(pi.parameter_name order by pi.ordinal_position) as param_names, + array_agg(pi.data_type order by pi.ordinal_position) as param_types + FROM + information_schema.routines r + JOIN + information_schema.parameters pi ON r.specific_name = pi.specific_name AND pi.parameter_mode = 'IN' + GROUP BY 1, 2, 3 + ) + SELECT + r.routine_name as name, + i.param_names, + i.param_types, + r.data_type as return_type, + r.function_type, + r.description + FROM + information_schema.routines r + LEFT JOIN + inputs i ON r.specific_name = i.specific_name + "#; + let batches = ctx + .sql(sql) + .await + .map_err(CoreError::from)? + .collect() + .await + .map_err(CoreError::from)?; + let mut functions = vec![]; + + for batch in batches { + let name_array = batch.column(0).as_string::(); + let param_names_array = batch.column(1).as_list::(); + let param_types_array = batch.column(2).as_list::(); + let return_type_array = batch.column(3).as_string::(); + let function_type_array = batch.column(4).as_string::(); + let description_array = batch.column(5).as_string::(); + + for row in 0..batch.num_rows() { + let name = name_array.value(row).to_string(); + let param_names = Self::to_string_vec(param_names_array); + let param_types = Self::to_string_vec(param_types_array); + let return_type = return_type_array.value(row).to_string(); + let description = description_array.value(row).to_string(); + let function_type = function_type_array.value(row).to_string(); + + functions.push(RemoteFunction { + name, + param_names: Some(param_names), + param_types: Some(param_types), + return_type: return_type, + description: Some(description), + function_type: FunctionType::from_str(&function_type).unwrap(), + }); + } + } + Ok(functions) + } + + fn to_string_vec(array: &GenericListArray) -> Vec { + array + .iter() + .find_map(|list| match list { + Some(list) => Some( + list.as_string::() + .iter() + .map(|s| match s { + Some(s) => s.to_string(), + None => "".to_string(), + }) + .collect::>(), + ), + None => None, + }) + .into_iter() + .flatten() + .collect::>() + } } diff --git a/wren-core-py/src/errors.rs b/wren-core-py/src/errors.rs index b6196d5c4..f5732c276 100644 --- a/wren-core-py/src/errors.rs +++ b/wren-core-py/src/errors.rs @@ -1,7 +1,7 @@ -use std::num::ParseIntError; use base64::DecodeError; use pyo3::exceptions::PyException; use pyo3::PyErr; +use std::num::ParseIntError; use std::string::FromUtf8Error; use thiserror::Error; diff --git a/wren-core-py/src/remote_functions.rs b/wren-core-py/src/remote_functions.rs index 552b94242..c090f3b58 100644 --- a/wren-core-py/src/remote_functions.rs +++ b/wren-core-py/src/remote_functions.rs @@ -19,11 +19,12 @@ use pyo3::prelude::PyDictMethods; use pyo3::types::PyDict; use pyo3::{pyclass, pymethods, PyObject, Python}; use serde::{Deserialize, Serialize}; +use std::fmt::Display; use std::str::FromStr; use wren_core::mdl::function::FunctionType; #[pyclass(name = "RemoteFunction")] -#[derive(Serialize, Deserialize, Clone)] +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Hash, Debug)] pub struct PyRemoteFunction { #[pyo3(get)] pub function_type: String, @@ -115,3 +116,18 @@ impl From for wren_core::mdl::function::RemoteFunction { } } } + +impl Display for PyRemoteFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "RemoteFunction {{ function_type: {}, name: {}, return_type: {:?}, param_names: {:?}, param_types: {:?}, description: {:?} }}", + self.function_type, + self.name, + self.return_type, + self.param_names, + self.param_types, + self.description + ) + } +} diff --git a/wren-core-py/tests/test_modeling_core.py b/wren-core-py/tests/test_modeling_core.py index 81d54a692..8820ce62a 100644 --- a/wren-core-py/tests/test_modeling_core.py +++ b/wren-core-py/tests/test_modeling_core.py @@ -106,7 +106,7 @@ def test_read_function_list(): path = "tests/functions.csv" session_context = SessionContext(manifest_str, path) functions = session_context.get_available_functions() - assert len(functions) == 275 + assert len(functions) == 25943 rewritten_sql = session_context.transform_sql( "SELECT add_two(c_custkey) FROM my_catalog.my_schema.customer" @@ -118,7 +118,7 @@ def test_read_function_list(): session_context = SessionContext(manifest_str, None) functions = session_context.get_available_functions() - assert len(functions) == 273 + assert len(functions) == 25941 def test_get_available_functions(): From 2f995f2cbb6df1c6e0544e28547b594c69c61fac Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Wed, 26 Mar 2025 14:08:05 +0800 Subject: [PATCH 02/24] add description to by pass function --- wren-core/core/src/lib.rs | 3 ++- wren-core/core/src/mdl/function.rs | 41 +++++++++++++++++++++++------- wren-core/core/src/mdl/mod.rs | 3 +++ 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/wren-core/core/src/lib.rs b/wren-core/core/src/lib.rs index b0686bb75..3513942f5 100644 --- a/wren-core/core/src/lib.rs +++ b/wren-core/core/src/lib.rs @@ -3,6 +3,7 @@ pub mod mdl; pub use datafusion::error::DataFusionError; pub use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; -pub use datafusion::prelude::SessionContext; +pub use datafusion::prelude::*; pub use datafusion::sql::sqlparser::*; +pub use datafusion::arrow::*; pub use mdl::AnalyzedWrenMDL; diff --git a/wren-core/core/src/mdl/function.rs b/wren-core/core/src/mdl/function.rs index 00f4e99f0..54917c923 100644 --- a/wren-core/core/src/mdl/function.rs +++ b/wren-core/core/src/mdl/function.rs @@ -5,8 +5,7 @@ use datafusion::logical_expr::function::{ AccumulatorArgs, PartitionEvaluatorArgs, WindowUDFFieldArgs, }; use datafusion::logical_expr::{ - Accumulator, AggregateUDFImpl, ColumnarValue, PartitionEvaluator, ScalarUDFImpl, - Signature, TypeSignature, Volatility, WindowUDFImpl, + Accumulator, AggregateUDFImpl, ColumnarValue, DocSection, Documentation, DocumentationBuilder, PartitionEvaluator, ScalarUDFImpl, Signature, TypeSignature, Volatility, WindowUDFImpl }; use serde::{Deserialize, Serialize}; use std::any::Any; @@ -46,7 +45,7 @@ impl FromStr for FunctionType { type Err = String; fn from_str(s: &str) -> Result { - match s { + match s.to_lowercase().as_str() { "scalar" => Ok(FunctionType::Scalar), "aggregate" => Ok(FunctionType::Aggregate), "window" => Ok(FunctionType::Window), @@ -63,10 +62,12 @@ pub struct ByPassScalarUDF { name: String, return_type: DataType, signature: Signature, + doc: Documentation, } impl ByPassScalarUDF { - pub fn new(name: &str, return_type: DataType) -> Self { + pub fn new(name: &str, return_type: DataType, description: Option) -> Self { + let doc= DocumentationBuilder::new_with_details(DocSection::default(), description.unwrap_or("".to_string()), "").build(); Self { name: name.to_string(), return_type, @@ -74,6 +75,7 @@ impl ByPassScalarUDF { vec![TypeSignature::VariadicAny, TypeSignature::Nullary], Volatility::Volatile, ), + doc, } } } @@ -98,6 +100,10 @@ impl ScalarUDFImpl for ByPassScalarUDF { fn invoke(&self, _args: &[ColumnarValue]) -> Result { internal_err!("This function should not be called") } + + fn documentation(&self) -> Option<&Documentation> { + Some(&self.doc) + } } /// An aggregate UDF that will be bypassed when planning logical plan. @@ -107,10 +113,12 @@ pub struct ByPassAggregateUDF { name: String, return_type: DataType, signature: Signature, + doc: Documentation, } impl ByPassAggregateUDF { - pub fn new(name: &str, return_type: DataType) -> Self { + pub fn new(name: &str, return_type: DataType, description: Option) -> Self { + let doc= DocumentationBuilder::new_with_details(DocSection::default(), description.unwrap_or("".to_string()), "").build(); Self { name: name.to_string(), return_type, @@ -118,6 +126,7 @@ impl ByPassAggregateUDF { vec![TypeSignature::VariadicAny, TypeSignature::Nullary], Volatility::Volatile, ), + doc, } } } @@ -142,6 +151,10 @@ impl AggregateUDFImpl for ByPassAggregateUDF { fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result> { internal_err!("This function should not be called") } + + fn documentation(&self) -> Option<&Documentation> { + Some(&self.doc) + } } /// A window UDF that will be bypassed when planning logical plan. @@ -151,10 +164,12 @@ pub struct ByPassWindowFunction { name: String, return_type: DataType, signature: Signature, + doc: Documentation, } impl ByPassWindowFunction { - pub fn new(name: &str, return_type: DataType) -> Self { + pub fn new(name: &str, return_type: DataType, description: Option) -> Self { + let doc= DocumentationBuilder::new_with_details(DocSection::default(), description.unwrap_or("".to_string()), "").build(); Self { name: name.to_string(), return_type, @@ -162,6 +177,7 @@ impl ByPassWindowFunction { vec![TypeSignature::VariadicAny, TypeSignature::Nullary], Volatility::Volatile, ), + doc, } } } @@ -193,6 +209,10 @@ impl WindowUDFImpl for ByPassWindowFunction { false, )) } + + fn documentation(&self) -> Option<&Documentation> { + Some(&self.doc) + } } #[cfg(test)] @@ -207,7 +227,7 @@ mod test { #[tokio::test] async fn test_by_pass_scalar_udf() -> Result<()> { - let udf = ByPassScalarUDF::new("date_diff", DataType::Int64); + let udf = ByPassScalarUDF::new("date_diff", DataType::Int64, None); let ctx = SessionContext::new(); ctx.register_udf(ScalarUDF::new_from_impl(udf)); @@ -221,6 +241,7 @@ mod test { ctx.register_udf(ScalarUDF::new_from_impl(ByPassScalarUDF::new( "today", DataType::Utf8, + None, ))); let plan_2 = ctx.sql("SELECT today()").await?.into_unoptimized_plan(); assert_eq!(format!("{plan_2}"), "Projection: today()\n EmptyRelation"); @@ -230,7 +251,7 @@ mod test { #[tokio::test] async fn test_by_pass_agg_udf() -> Result<()> { - let udf = ByPassAggregateUDF::new("count_self", DataType::Int64); + let udf = ByPassAggregateUDF::new("count_self", DataType::Int64, None); let ctx = SessionContext::new(); ctx.register_udaf(AggregateUDF::new_from_impl(udf)); @@ -245,6 +266,7 @@ mod test { ctx.register_udaf(AggregateUDF::new_from_impl(ByPassAggregateUDF::new( "total_count", DataType::Int64, + None, ))); let plan_2 = ctx .sql("SELECT total_count() AS total_count FROM (VALUES (1), (2), (3)) AS val(x)") @@ -263,7 +285,7 @@ mod test { #[tokio::test] async fn test_by_pass_window_udf() -> Result<()> { - let udf = ByPassWindowFunction::new("custom_window", DataType::Int64); + let udf = ByPassWindowFunction::new("custom_window", DataType::Int64, None); let ctx = SessionContext::new(); ctx.register_udwf(WindowUDF::new_from_impl(udf)); @@ -279,6 +301,7 @@ mod test { ctx.register_udwf(WindowUDF::new_from_impl(ByPassWindowFunction::new( "cume_dist", DataType::Int64, + None, ))); let plan_2 = ctx .sql("SELECT cume_dist() OVER ()") diff --git a/wren-core/core/src/mdl/mod.rs b/wren-core/core/src/mdl/mod.rs index 37953692d..84820ae3c 100644 --- a/wren-core/core/src/mdl/mod.rs +++ b/wren-core/core/src/mdl/mod.rs @@ -389,18 +389,21 @@ fn register_remote_function( ctx.register_udf(ScalarUDF::new_from_impl(ByPassScalarUDF::new( &remote_function.name, map_data_type(&remote_function.return_type)?, + remote_function.description.clone(), ))) } FunctionType::Aggregate => { ctx.register_udaf(AggregateUDF::new_from_impl(ByPassAggregateUDF::new( &remote_function.name, map_data_type(&remote_function.return_type)?, + remote_function.description.clone(), ))) } FunctionType::Window => { ctx.register_udwf(WindowUDF::new_from_impl(ByPassWindowFunction::new( &remote_function.name, map_data_type(&remote_function.return_type)?, + remote_function.description.clone(), ))) } }; From 5b42904c1eb670e9526a766a3716163e29858779 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Thu, 27 Mar 2025 16:22:43 +0800 Subject: [PATCH 03/24] parse the parameter type --- wren-core-py/src/context.rs | 56 +++++------ wren-core/core/src/lib.rs | 2 +- wren-core/core/src/mdl/function.rs | 146 ++++++++++++++++++++++++----- wren-core/core/src/mdl/mod.rs | 3 - 4 files changed, 145 insertions(+), 62 deletions(-) diff --git a/wren-core-py/src/context.rs b/wren-core-py/src/context.rs index 9f2a5f9e2..b90743ac1 100644 --- a/wren-core-py/src/context.rs +++ b/wren-core-py/src/context.rs @@ -29,7 +29,6 @@ use tokio::runtime::Runtime; use wren_core::array::{AsArray, GenericListArray}; use wren_core::ast::{visit_statements_mut, Expr, Statement, Value}; use wren_core::dialect::GenericDialect; -use wren_core::logical_plan::utils::map_data_type; use wren_core::mdl::context::create_ctx_with_mdl; use wren_core::mdl::function::{ ByPassAggregateUDF, ByPassScalarUDF, ByPassWindowFunction, FunctionType, @@ -97,14 +96,16 @@ impl PySessionContext { }) .map_err(CoreError::from)?; - remote_functions.into_iter().try_for_each(|remote_function| { - debug!("Registering remote function: {:?}", remote_function); - // TODO: check not only the name but also the return type and the parameter types - if !registered_functions.contains(&remote_function.name) { - Self::register_remote_function(&ctx, remote_function)?; - } - Ok::<(), CoreError>(()) - })?; + remote_functions + .into_iter() + .try_for_each(|remote_function| { + debug!("Registering remote function: {:?}", remote_function); + // TODO: check not only the name but also the return type and the parameter types + if !registered_functions.contains(&remote_function.name) { + Self::register_remote_function(&ctx, remote_function)?; + } + Ok::<(), CoreError>(()) + })?; let Some(mdl_base64) = mdl_base64 else { return Ok(Self { @@ -180,7 +181,7 @@ impl PySessionContext { if n.parse::().unwrap() > pushdown { q.limit = Some(Expr::Value(Value::Number( pushdown.to_string(), - is.clone(), + *is, ))); } } @@ -202,28 +203,16 @@ impl PySessionContext { ) -> PyResult<()> { match &remote_function.function_type { FunctionType::Scalar => { - ctx.register_udf(ScalarUDF::new_from_impl(ByPassScalarUDF::new( - &remote_function.name, - map_data_type(&remote_function.return_type) - .map_err(CoreError::from)?, - remote_function.description, - ))) + let func: ByPassScalarUDF = remote_function.into(); + ctx.register_udf(ScalarUDF::new_from_impl(func)) } FunctionType::Aggregate => { - ctx.register_udaf(AggregateUDF::new_from_impl(ByPassAggregateUDF::new( - &remote_function.name, - map_data_type(&remote_function.return_type) - .map_err(CoreError::from)?, - remote_function.description, - ))) + let func: ByPassAggregateUDF = remote_function.into(); + ctx.register_udaf(AggregateUDF::new_from_impl(func)) } FunctionType::Window => { - ctx.register_udwf(WindowUDF::new_from_impl(ByPassWindowFunction::new( - &remote_function.name, - map_data_type(&remote_function.return_type) - .map_err(CoreError::from)?, - remote_function.description, - ))) + let func: ByPassWindowFunction = remote_function.into(); + ctx.register_udwf(WindowUDF::new_from_impl(func)) } } Ok(()) @@ -303,7 +292,7 @@ impl PySessionContext { name, param_names: Some(param_names), param_types: Some(param_types), - return_type: return_type, + return_type, description: Some(description), function_type: FunctionType::from_str(&function_type).unwrap(), }); @@ -315,17 +304,16 @@ impl PySessionContext { fn to_string_vec(array: &GenericListArray) -> Vec { array .iter() - .find_map(|list| match list { - Some(list) => Some( + .find_map(|list| { + list.map(|list| { list.as_string::() .iter() .map(|s| match s { Some(s) => s.to_string(), None => "".to_string(), }) - .collect::>(), - ), - None => None, + .collect::>() + }) }) .into_iter() .flatten() diff --git a/wren-core/core/src/lib.rs b/wren-core/core/src/lib.rs index 3513942f5..2e2985775 100644 --- a/wren-core/core/src/lib.rs +++ b/wren-core/core/src/lib.rs @@ -1,9 +1,9 @@ pub mod logical_plan; pub mod mdl; +pub use datafusion::arrow::*; pub use datafusion::error::DataFusionError; pub use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; pub use datafusion::prelude::*; pub use datafusion::sql::sqlparser::*; -pub use datafusion::arrow::*; pub use mdl::AnalyzedWrenMDL; diff --git a/wren-core/core/src/mdl/function.rs b/wren-core/core/src/mdl/function.rs index 54917c923..21f6c16cb 100644 --- a/wren-core/core/src/mdl/function.rs +++ b/wren-core/core/src/mdl/function.rs @@ -5,7 +5,9 @@ use datafusion::logical_expr::function::{ AccumulatorArgs, PartitionEvaluatorArgs, WindowUDFFieldArgs, }; use datafusion::logical_expr::{ - Accumulator, AggregateUDFImpl, ColumnarValue, DocSection, Documentation, DocumentationBuilder, PartitionEvaluator, ScalarUDFImpl, Signature, TypeSignature, Volatility, WindowUDFImpl + Accumulator, AggregateUDFImpl, ColumnarValue, DocSection, Documentation, + DocumentationBuilder, PartitionEvaluator, ScalarUDFImpl, Signature, TypeSignature, + Volatility, WindowUDFImpl, }; use serde::{Deserialize, Serialize}; use std::any::Any; @@ -22,7 +24,35 @@ pub struct RemoteFunction { pub description: Option, } -#[derive(Serialize, Deserialize, Debug, Clone, Hash)] +impl RemoteFunction { + pub fn get_signature(&self) -> Signature { + let mut signatures = vec![]; + if let Some(param_types) = &self.param_types { + if let Some(types) = Self::transform_param_type(param_types.as_slice()) { + signatures.push(TypeSignature::Exact(types)); + } + } + // If the function has no siganture, we will add two default signatures: nullary and variadic any + if signatures.is_empty() { + signatures.push(TypeSignature::Nullary); + signatures.push(TypeSignature::VariadicAny); + } + Signature::one_of(signatures, Volatility::Volatile) + } + + fn transform_param_type(param_types: &[String]) -> Option> { + let types = param_types + .iter() + .map(|t| DataType::from_str(t.as_str()).ok()) + .collect::>(); + if types.iter().any(|x| x.is_none()) { + return None; + } + Some(types.into_iter().map(|x| x.unwrap().clone()).collect()) + } +} + +#[derive(Serialize, Deserialize, Debug, Clone, Hash, PartialEq)] #[serde(rename_all = "lowercase")] pub enum FunctionType { Scalar, @@ -62,20 +92,43 @@ pub struct ByPassScalarUDF { name: String, return_type: DataType, signature: Signature, - doc: Documentation, + doc: Option, } impl ByPassScalarUDF { - pub fn new(name: &str, return_type: DataType, description: Option) -> Self { - let doc= DocumentationBuilder::new_with_details(DocSection::default(), description.unwrap_or("".to_string()), "").build(); + pub fn new(name: &str, return_type: DataType) -> Self { Self { name: name.to_string(), return_type, signature: Signature::one_of( - vec![TypeSignature::VariadicAny, TypeSignature::Nullary], + vec![TypeSignature::Nullary, TypeSignature::VariadicAny], Volatility::Volatile, ), - doc, + doc: None, + } + } +} + +impl From for ByPassScalarUDF { + fn from(func: RemoteFunction) -> Self { + let return_type = DataType::from_str(func.return_type.as_str()).unwrap(); + let mut builder = DocumentationBuilder::new_with_details( + DocSection::default(), + func.description.clone().unwrap_or("".to_string()), + "", + ); + let signature = func.get_signature(); + if let Some(param_names) = func.param_names.as_ref() { + for (i, name) in param_names.iter().enumerate() { + builder = builder + .with_argument(name, func.param_types.as_ref().unwrap()[i].as_str()); + } + } + ByPassScalarUDF { + name: func.name, + return_type, + signature, + doc: Some(builder.build()), } } } @@ -102,7 +155,7 @@ impl ScalarUDFImpl for ByPassScalarUDF { } fn documentation(&self) -> Option<&Documentation> { - Some(&self.doc) + self.doc.as_ref() } } @@ -113,12 +166,11 @@ pub struct ByPassAggregateUDF { name: String, return_type: DataType, signature: Signature, - doc: Documentation, + doc: Option, } impl ByPassAggregateUDF { - pub fn new(name: &str, return_type: DataType, description: Option) -> Self { - let doc= DocumentationBuilder::new_with_details(DocSection::default(), description.unwrap_or("".to_string()), "").build(); + pub fn new(name: &str, return_type: DataType) -> Self { Self { name: name.to_string(), return_type, @@ -126,7 +178,32 @@ impl ByPassAggregateUDF { vec![TypeSignature::VariadicAny, TypeSignature::Nullary], Volatility::Volatile, ), - doc, + doc: None, + } + } +} + +impl From for ByPassAggregateUDF { + fn from(func: RemoteFunction) -> Self { + let return_type = DataType::from_str(func.return_type.as_str()).unwrap(); + let mut builder = DocumentationBuilder::new_with_details( + DocSection::default(), + func.description.clone().unwrap_or("".to_string()), + "", + ); + let signature = func.get_signature(); + if let Some(param_names) = func.param_names.as_ref() { + for (i, name) in param_names.iter().enumerate() { + builder = builder + .with_argument(name, func.param_types.as_ref().unwrap()[i].as_str()); + } + } + + ByPassAggregateUDF { + name: func.name, + return_type, + signature, + doc: Some(builder.build()), } } } @@ -153,7 +230,7 @@ impl AggregateUDFImpl for ByPassAggregateUDF { } fn documentation(&self) -> Option<&Documentation> { - Some(&self.doc) + self.doc.as_ref() } } @@ -164,12 +241,11 @@ pub struct ByPassWindowFunction { name: String, return_type: DataType, signature: Signature, - doc: Documentation, + doc: Option, } impl ByPassWindowFunction { - pub fn new(name: &str, return_type: DataType, description: Option) -> Self { - let doc= DocumentationBuilder::new_with_details(DocSection::default(), description.unwrap_or("".to_string()), "").build(); + pub fn new(name: &str, return_type: DataType) -> Self { Self { name: name.to_string(), return_type, @@ -177,7 +253,32 @@ impl ByPassWindowFunction { vec![TypeSignature::VariadicAny, TypeSignature::Nullary], Volatility::Volatile, ), - doc, + doc: None, + } + } +} + +impl From for ByPassWindowFunction { + fn from(func: RemoteFunction) -> Self { + let return_type = DataType::from_str(func.return_type.as_str()).unwrap(); + let mut builder = DocumentationBuilder::new_with_details( + DocSection::default(), + func.description.clone().unwrap_or("".to_string()), + "", + ); + let signature = func.get_signature(); + if let Some(param_names) = func.param_names.as_ref() { + for (i, name) in param_names.iter().enumerate() { + builder = builder + .with_argument(name, func.param_types.as_ref().unwrap()[i].as_str()); + } + } + + ByPassWindowFunction { + name: func.name, + return_type, + signature, + doc: Some(builder.build()), } } } @@ -211,7 +312,7 @@ impl WindowUDFImpl for ByPassWindowFunction { } fn documentation(&self) -> Option<&Documentation> { - Some(&self.doc) + self.doc.as_ref() } } @@ -227,7 +328,7 @@ mod test { #[tokio::test] async fn test_by_pass_scalar_udf() -> Result<()> { - let udf = ByPassScalarUDF::new("date_diff", DataType::Int64, None); + let udf = ByPassScalarUDF::new("date_diff", DataType::Int64); let ctx = SessionContext::new(); ctx.register_udf(ScalarUDF::new_from_impl(udf)); @@ -241,7 +342,6 @@ mod test { ctx.register_udf(ScalarUDF::new_from_impl(ByPassScalarUDF::new( "today", DataType::Utf8, - None, ))); let plan_2 = ctx.sql("SELECT today()").await?.into_unoptimized_plan(); assert_eq!(format!("{plan_2}"), "Projection: today()\n EmptyRelation"); @@ -251,7 +351,7 @@ mod test { #[tokio::test] async fn test_by_pass_agg_udf() -> Result<()> { - let udf = ByPassAggregateUDF::new("count_self", DataType::Int64, None); + let udf = ByPassAggregateUDF::new("count_self", DataType::Int64); let ctx = SessionContext::new(); ctx.register_udaf(AggregateUDF::new_from_impl(udf)); @@ -266,7 +366,6 @@ mod test { ctx.register_udaf(AggregateUDF::new_from_impl(ByPassAggregateUDF::new( "total_count", DataType::Int64, - None, ))); let plan_2 = ctx .sql("SELECT total_count() AS total_count FROM (VALUES (1), (2), (3)) AS val(x)") @@ -285,7 +384,7 @@ mod test { #[tokio::test] async fn test_by_pass_window_udf() -> Result<()> { - let udf = ByPassWindowFunction::new("custom_window", DataType::Int64, None); + let udf = ByPassWindowFunction::new("custom_window", DataType::Int64); let ctx = SessionContext::new(); ctx.register_udwf(WindowUDF::new_from_impl(udf)); @@ -301,7 +400,6 @@ mod test { ctx.register_udwf(WindowUDF::new_from_impl(ByPassWindowFunction::new( "cume_dist", DataType::Int64, - None, ))); let plan_2 = ctx .sql("SELECT cume_dist() OVER ()") diff --git a/wren-core/core/src/mdl/mod.rs b/wren-core/core/src/mdl/mod.rs index 84820ae3c..37953692d 100644 --- a/wren-core/core/src/mdl/mod.rs +++ b/wren-core/core/src/mdl/mod.rs @@ -389,21 +389,18 @@ fn register_remote_function( ctx.register_udf(ScalarUDF::new_from_impl(ByPassScalarUDF::new( &remote_function.name, map_data_type(&remote_function.return_type)?, - remote_function.description.clone(), ))) } FunctionType::Aggregate => { ctx.register_udaf(AggregateUDF::new_from_impl(ByPassAggregateUDF::new( &remote_function.name, map_data_type(&remote_function.return_type)?, - remote_function.description.clone(), ))) } FunctionType::Window => { ctx.register_udwf(WindowUDF::new_from_impl(ByPassWindowFunction::new( &remote_function.name, map_data_type(&remote_function.return_type)?, - remote_function.description.clone(), ))) } }; From 85d86b2cccb6cbb51177a49aa3b06ce79e63700c Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Thu, 27 Mar 2025 17:17:16 +0800 Subject: [PATCH 04/24] use map_data_type to handle type name --- .../core/src/logical_plan/analyze/plan.rs | 12 +++---- wren-core/core/src/logical_plan/utils.rs | 36 ++++++++++++------- wren-core/core/src/mdl/function.rs | 10 +++--- wren-core/core/src/mdl/mod.rs | 10 +++--- wren-core/core/src/mdl/utils.rs | 6 ++-- 5 files changed, 43 insertions(+), 31 deletions(-) diff --git a/wren-core/core/src/logical_plan/analyze/plan.rs b/wren-core/core/src/logical_plan/analyze/plan.rs index 87dab9f99..ef9857b52 100644 --- a/wren-core/core/src/logical_plan/analyze/plan.rs +++ b/wren-core/core/src/logical_plan/analyze/plan.rs @@ -20,7 +20,7 @@ use petgraph::Graph; use crate::logical_plan::analyze::RelationChain; use crate::logical_plan::analyze::RelationChain::Start; -use crate::logical_plan::utils::{from_qualified_name, map_data_type}; +use crate::logical_plan::utils::{from_qualified_name, try_map_data_type}; use crate::mdl; use crate::mdl::lineage::DatasetLink; use crate::mdl::manifest::{JoinType, Model}; @@ -227,7 +227,7 @@ impl ModelPlanNodeBuilder { Some(TableReference::bare(quoted(model.name()))), Arc::new(Field::new( column.name(), - map_data_type(&column.r#type)?, + try_map_data_type(&column.r#type)?, column.not_null, )), )); @@ -764,7 +764,7 @@ impl ModelSourceNode { Some(TableReference::bare(quoted(model.name()))), Arc::new(Field::new( column.name(), - map_data_type(&column.r#type)?, + try_map_data_type(&column.r#type)?, column.not_null, )), )); @@ -804,7 +804,7 @@ impl ModelSourceNode { Some(TableReference::bare(quoted(model.name()))), Arc::new(Field::new( column.name(), - map_data_type(&column.r#type)?, + try_map_data_type(&column.r#type)?, column.not_null, )), )); @@ -898,12 +898,12 @@ impl CalculationPlanNode { let output_field = vec![ Arc::new(Field::new( calculation.column.name(), - map_data_type(&calculation.column.r#type)?, + try_map_data_type(&calculation.column.r#type)?, calculation.column.not_null, )), Arc::new(Field::new( pk_column.name(), - map_data_type(&pk_column.r#type)?, + try_map_data_type(&pk_column.r#type)?, pk_column.not_null, )), ] diff --git a/wren-core/core/src/logical_plan/utils.rs b/wren-core/core/src/logical_plan/utils.rs index 5568298bb..e07be54af 100644 --- a/wren-core/core/src/logical_plan/utils.rs +++ b/wren-core/core/src/logical_plan/utils.rs @@ -22,6 +22,7 @@ use log::debug; use petgraph::dot::{Config, Dot}; use petgraph::Graph; use std::collections::{BTreeMap, HashSet}; +use std::str::FromStr; use std::{collections::HashMap, sync::Arc}; fn create_list_type(array_type: &str) -> Result { @@ -35,7 +36,7 @@ fn create_list_type(array_type: &str) -> Result { return plan_err!("Array type must have an element type") } ArrayElemTypeDef::AngleBracket(data_type) => { - map_data_type(&data_type.to_string())? + try_map_data_type(&data_type.to_string())? } ArrayElemTypeDef::SquareBracket(_, _) => { unreachable!() @@ -63,7 +64,7 @@ fn create_struct_type(struct_type: &str) -> Result { return plan_err!("struct must have at least one field"); } for field in fields { - let data_type = map_data_type(field.field_type.to_string().as_str())?; + let data_type = try_map_data_type(field.field_type.to_string().as_str())?; let field = Field::new( field .field_name @@ -91,9 +92,19 @@ fn parse_type(struct_type: &str) -> Result { .parse_data_type()?) } +/// Map the data type from the string to the Arrow data type +/// If the data type is not supported, it will return Utf8 +pub fn try_map_data_type(data_type: &str) -> Result { + Ok(map_data_type(data_type).ok().unwrap_or_else(|| { + debug!("can't parse data type {}, return Utf8", data_type); + DataType::Utf8 + })) +} + pub fn map_data_type(data_type: &str) -> Result { let lower = data_type.to_lowercase(); let data_type = lower.as_str(); + // TODO: try parse nested type by arrow // Currently, we don't care about the element type of the array or struct. // We only care about the array or struct itself. if data_type.starts_with("array") { @@ -146,9 +157,8 @@ pub fn map_data_type(data_type: &str) -> Result { "time" => DataType::Time32(TimeUnit::Nanosecond), // chose the smallest time unit "null" => DataType::Null, _ => { - // default to string - debug!("map unknown type {} to Utf8", data_type); - DataType::Utf8 + debug!("try parse by arrow {}", data_type); + DataType::from_str(data_type)? } }; Ok(result) @@ -163,7 +173,7 @@ pub fn create_schema(columns: Vec>) -> Result { let fields: Vec = columns .iter() .map(|column| { - let data_type = map_data_type(&column.r#type)?; + let data_type = try_map_data_type(&column.r#type)?; Ok(Field::new(&column.name, data_type, column.not_null)) }) .collect::>>()?; @@ -344,7 +354,7 @@ pub fn eliminate_ambiguous_columns(expr: Vec) -> Vec { #[cfg(test)] mod test { use crate::logical_plan::utils::{ - create_list_type, create_struct_type, map_data_type, + create_list_type, create_struct_type, try_map_data_type, }; use datafusion::arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit}; use datafusion::common::Result; @@ -411,35 +421,35 @@ mod test { ), ]; for (data_type, expected) in test_cases { - let result = map_data_type(data_type)?; + let result = try_map_data_type(data_type)?; assert_eq!(result, expected); // test case insensitivity - let result = map_data_type(&data_type.to_uppercase())?; + let result = try_map_data_type(&data_type.to_uppercase())?; assert_eq!(result, expected); } - let _ = map_data_type("array").map_err(|e| { + let _ = try_map_data_type("array").map_err(|e| { assert_eq!( e.to_string(), "SQL error: ParserError(\"Expected: <, found: EOF\")" ); }); - let _ = map_data_type("array<>").map_err(|e| { + let _ = try_map_data_type("array<>").map_err(|e| { assert_eq!( e.to_string(), "SQL error: ParserError(\"Expected: <, found: <> at Line: 1, Column: 6\")" ); }); - let _ = map_data_type("array(int64)").map_err(|e| { + let _ = try_map_data_type("array(int64)").map_err(|e| { assert_eq!( e.to_string(), "SQL error: ParserError(\"Expected: <, found: ( at Line: 1, Column: 6\")" ); }); - let _ = map_data_type("struct").map_err(|e| { + let _ = try_map_data_type("struct").map_err(|e| { assert_eq!( e.to_string(), "Error during planning: struct must have at least one field" diff --git a/wren-core/core/src/mdl/function.rs b/wren-core/core/src/mdl/function.rs index 21f6c16cb..c2c57223a 100644 --- a/wren-core/core/src/mdl/function.rs +++ b/wren-core/core/src/mdl/function.rs @@ -14,6 +14,8 @@ use std::any::Any; use std::fmt::Display; use std::str::FromStr; +use crate::logical_plan::utils::map_data_type; + #[derive(Serialize, Deserialize, Debug, Clone, Hash)] pub struct RemoteFunction { pub function_type: FunctionType, @@ -43,7 +45,7 @@ impl RemoteFunction { fn transform_param_type(param_types: &[String]) -> Option> { let types = param_types .iter() - .map(|t| DataType::from_str(t.as_str()).ok()) + .map(|t| map_data_type(t.as_str()).ok()) .collect::>(); if types.iter().any(|x| x.is_none()) { return None; @@ -111,7 +113,7 @@ impl ByPassScalarUDF { impl From for ByPassScalarUDF { fn from(func: RemoteFunction) -> Self { - let return_type = DataType::from_str(func.return_type.as_str()).unwrap(); + let return_type = map_data_type(func.return_type.as_str()).unwrap(); let mut builder = DocumentationBuilder::new_with_details( DocSection::default(), func.description.clone().unwrap_or("".to_string()), @@ -185,7 +187,7 @@ impl ByPassAggregateUDF { impl From for ByPassAggregateUDF { fn from(func: RemoteFunction) -> Self { - let return_type = DataType::from_str(func.return_type.as_str()).unwrap(); + let return_type = map_data_type(func.return_type.as_str()).unwrap(); let mut builder = DocumentationBuilder::new_with_details( DocSection::default(), func.description.clone().unwrap_or("".to_string()), @@ -260,7 +262,7 @@ impl ByPassWindowFunction { impl From for ByPassWindowFunction { fn from(func: RemoteFunction) -> Self { - let return_type = DataType::from_str(func.return_type.as_str()).unwrap(); + let return_type = map_data_type(func.return_type.as_str()).unwrap(); let mut builder = DocumentationBuilder::new_with_details( DocSection::default(), func.description.clone().unwrap_or("".to_string()), diff --git a/wren-core/core/src/mdl/mod.rs b/wren-core/core/src/mdl/mod.rs index 37953692d..94f1e3a95 100644 --- a/wren-core/core/src/mdl/mod.rs +++ b/wren-core/core/src/mdl/mod.rs @@ -1,4 +1,4 @@ -use crate::logical_plan::utils::{from_qualified_name_str, map_data_type}; +use crate::logical_plan::utils::{from_qualified_name_str, try_map_data_type}; use crate::mdl::builder::ManifestBuilder; use crate::mdl::context::{create_ctx_with_mdl, WrenDataSource}; use crate::mdl::dialect::WrenDialect; @@ -219,7 +219,7 @@ impl WrenMDL { if let Some(name) = Self::collect_one_column(&expr) { Ok(Some(Field::new( alias.map(|a| a.value).unwrap_or_else(|| name.value.clone()), - map_data_type(&column.r#type)?, + try_map_data_type(&column.r#type)?, column.not_null, ))) } else { @@ -388,19 +388,19 @@ fn register_remote_function( FunctionType::Scalar => { ctx.register_udf(ScalarUDF::new_from_impl(ByPassScalarUDF::new( &remote_function.name, - map_data_type(&remote_function.return_type)?, + try_map_data_type(&remote_function.return_type)?, ))) } FunctionType::Aggregate => { ctx.register_udaf(AggregateUDF::new_from_impl(ByPassAggregateUDF::new( &remote_function.name, - map_data_type(&remote_function.return_type)?, + try_map_data_type(&remote_function.return_type)?, ))) } FunctionType::Window => { ctx.register_udwf(WindowUDF::new_from_impl(ByPassWindowFunction::new( &remote_function.name, - map_data_type(&remote_function.return_type)?, + try_map_data_type(&remote_function.return_type)?, ))) } }; diff --git a/wren-core/core/src/mdl/utils.rs b/wren-core/core/src/mdl/utils.rs index 6c205c8b9..79939afe6 100644 --- a/wren-core/core/src/mdl/utils.rs +++ b/wren-core/core/src/mdl/utils.rs @@ -13,7 +13,7 @@ use std::collections::{BTreeSet, VecDeque}; use std::ops::ControlFlow; use std::sync::Arc; -use crate::logical_plan::utils::{from_qualified_name, map_data_type}; +use crate::logical_plan::utils::{from_qualified_name, try_map_data_type}; use crate::mdl::manifest::Model; use crate::mdl::{AnalyzedWrenMDL, ColumnReference, Dataset, SessionStateRef}; @@ -212,7 +212,7 @@ pub fn quoted(s: &str) -> String { /// Transform the column to a datafusion field pub fn to_field(column: &wren_core_base::mdl::Column) -> Result { - let data_type = map_data_type(&column.r#type)?; + let data_type = try_map_data_type(&column.r#type)?; Ok(Field::new(&column.name, data_type, column.not_null)) } @@ -230,7 +230,7 @@ pub fn to_remote_field( let columns = collect_columns(expr); columns .into_iter() - .map(|c| Ok(Field::new(c.value, map_data_type(&column.r#type)?, false))) + .map(|c| Ok(Field::new(c.value, try_map_data_type(&column.r#type)?, false))) .collect::>() } else { Ok(vec![to_field(column)?]) From a994470d6f4eec1bf9738088fd3c3a808f98038f Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Thu, 27 Mar 2025 17:17:57 +0800 Subject: [PATCH 05/24] fix parameter builder and test --- wren-core-py/src/context.rs | 30 +++++++++--------------- wren-core-py/tests/test_modeling_core.py | 16 ++++++------- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/wren-core-py/src/context.rs b/wren-core-py/src/context.rs index b90743ac1..c7473634f 100644 --- a/wren-core-py/src/context.rs +++ b/wren-core-py/src/context.rs @@ -20,13 +20,14 @@ use crate::manifest::to_manifest; use crate::remote_functions::PyRemoteFunction; use log::debug; use pyo3::{pyclass, pymethods, PyErr, PyResult}; +use wren_core::datatypes::GenericStringType; use std::hash::Hash; use std::ops::ControlFlow; use std::str::FromStr; use std::sync::Arc; use std::vec; use tokio::runtime::Runtime; -use wren_core::array::{AsArray, GenericListArray}; +use wren_core::array::{AsArray, GenericByteArray}; use wren_core::ast::{visit_statements_mut, Expr, Statement, Value}; use wren_core::dialect::GenericDialect; use wren_core::mdl::context::create_ctx_with_mdl; @@ -282,8 +283,8 @@ impl PySessionContext { for row in 0..batch.num_rows() { let name = name_array.value(row).to_string(); - let param_names = Self::to_string_vec(param_names_array); - let param_types = Self::to_string_vec(param_types_array); + let param_names = Self::to_string_vec(param_names_array.value(row).as_string::()); + let param_types = Self::to_string_vec(param_types_array.value(row).as_string::()); let return_type = return_type_array.value(row).to_string(); let description = description_array.value(row).to_string(); let function_type = function_type_array.value(row).to_string(); @@ -301,22 +302,13 @@ impl PySessionContext { Ok(functions) } - fn to_string_vec(array: &GenericListArray) -> Vec { + fn to_string_vec(array: &GenericByteArray>) -> Vec { array - .iter() - .find_map(|list| { - list.map(|list| { - list.as_string::() - .iter() - .map(|s| match s { - Some(s) => s.to_string(), - None => "".to_string(), - }) - .collect::>() - }) - }) - .into_iter() - .flatten() - .collect::>() + .iter() + .map(|s| match s { + Some(s) => s.to_string(), + None => "".to_string(), + }) + .collect::>() } } diff --git a/wren-core-py/tests/test_modeling_core.py b/wren-core-py/tests/test_modeling_core.py index 8820ce62a..122743c9c 100644 --- a/wren-core-py/tests/test_modeling_core.py +++ b/wren-core-py/tests/test_modeling_core.py @@ -94,11 +94,11 @@ def test_session_context(): ) session_context = SessionContext(manifest_str, "tests/functions.csv") - sql = "SELECT add_two(c_custkey) FROM my_catalog.my_schema.customer" + sql = "SELECT add_two(c_custkey, c_custkey) FROM my_catalog.my_schema.customer" rewritten_sql = session_context.transform_sql(sql) assert ( rewritten_sql - == "SELECT add_two(customer.c_custkey) FROM (SELECT customer.c_custkey FROM (SELECT __source.c_custkey AS c_custkey FROM main.customer AS __source) AS customer) AS customer" + == "SELECT add_two(customer.c_custkey, customer.c_custkey) FROM (SELECT customer.c_custkey FROM (SELECT __source.c_custkey AS c_custkey FROM main.customer AS __source) AS customer) AS customer" ) @@ -109,11 +109,11 @@ def test_read_function_list(): assert len(functions) == 25943 rewritten_sql = session_context.transform_sql( - "SELECT add_two(c_custkey) FROM my_catalog.my_schema.customer" + "SELECT add_two(c_custkey, c_custkey) FROM my_catalog.my_schema.customer" ) assert ( rewritten_sql - == "SELECT add_two(customer.c_custkey) FROM (SELECT customer.c_custkey FROM (SELECT __source.c_custkey AS c_custkey FROM main.customer AS __source) AS customer) AS customer" + == "SELECT add_two(customer.c_custkey, customer.c_custkey) FROM (SELECT customer.c_custkey FROM (SELECT __source.c_custkey AS c_custkey FROM main.customer AS __source) AS customer) AS customer" ) session_context = SessionContext(manifest_str, None) @@ -128,15 +128,15 @@ def test_get_available_functions(): assert add_two.name == "add_two" assert add_two.function_type == "scalar" assert add_two.description == "Adds two numbers together." - assert add_two.return_type == "int" + assert add_two.return_type == "Int32" assert add_two.param_names == "f1,f2" - assert add_two.param_types == "int,int" + assert add_two.param_types == "Int32,Int32" max_if = next(f for f in functions if f.name == "max_if") assert max_if.name == "max_if" assert max_if.function_type == "window" - assert max_if.param_names is None - assert max_if.param_types is None + assert max_if.param_names is "" + assert max_if.param_types is "" @pytest.mark.parametrize( From 242bd79753480520a4aec54c6e493d71570fa624 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Thu, 27 Mar 2025 17:18:26 +0800 Subject: [PATCH 06/24] fix fmt and update lock --- wren-core-py/poetry.lock | 20 ++++++-------------- wren-core-py/src/context.rs | 20 +++++++++++--------- wren-core-py/tests/test_modeling_core.py | 4 ++-- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/wren-core-py/poetry.lock b/wren-core-py/poetry.lock index 80c47776f..f044fc303 100644 --- a/wren-core-py/poetry.lock +++ b/wren-core-py/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "colorama" @@ -6,8 +6,6 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["dev"] -markers = "sys_platform == \"win32\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -15,14 +13,13 @@ files = [ [[package]] name = "iniconfig" -version = "2.0.0" +version = "2.1.0" description = "brain-dead simple config-ini parsing" optional = false -python-versions = ">=3.7" -groups = ["dev"] +python-versions = ">=3.8" files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] [[package]] @@ -31,7 +28,6 @@ version = "1.8.3" description = "Build and publish crates with pyo3, cffi and uniffi bindings as well as rust binaries as python packages" optional = false python-versions = ">=3.7" -groups = ["main"] files = [ {file = "maturin-1.8.3-py3-none-linux_armv6l.whl", hash = "sha256:fa27466b627150123729b2e611f9f9cfade84d24385d72c6877f78c30de30e89"}, {file = "maturin-1.8.3-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:583404d20d7f1d9c8f3c18dcab9014faacabbed6be02da80062c06cd0e279554"}, @@ -58,7 +54,6 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -70,7 +65,6 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -86,7 +80,6 @@ version = "8.3.5" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" -groups = ["dev"] files = [ {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, @@ -107,7 +100,6 @@ version = "0.11.2" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" -groups = ["dev"] files = [ {file = "ruff-0.11.2-py3-none-linux_armv6l.whl", hash = "sha256:c69e20ea49e973f3afec2c06376eb56045709f0212615c1adb0eda35e8a4e477"}, {file = "ruff-0.11.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2c5424cc1c4eb1d8ecabe6d4f1b70470b4f24a0c0171356290b1953ad8f0e272"}, @@ -130,6 +122,6 @@ files = [ ] [metadata] -lock-version = "2.1" +lock-version = "2.0" python-versions = ">=3.11,<3.12" content-hash = "753864ffa4ae57df3096009665634308586b372b044df2bd670f548192244721" diff --git a/wren-core-py/src/context.rs b/wren-core-py/src/context.rs index c7473634f..c946fca83 100644 --- a/wren-core-py/src/context.rs +++ b/wren-core-py/src/context.rs @@ -20,7 +20,6 @@ use crate::manifest::to_manifest; use crate::remote_functions::PyRemoteFunction; use log::debug; use pyo3::{pyclass, pymethods, PyErr, PyResult}; -use wren_core::datatypes::GenericStringType; use std::hash::Hash; use std::ops::ControlFlow; use std::str::FromStr; @@ -29,6 +28,7 @@ use std::vec; use tokio::runtime::Runtime; use wren_core::array::{AsArray, GenericByteArray}; use wren_core::ast::{visit_statements_mut, Expr, Statement, Value}; +use wren_core::datatypes::GenericStringType; use wren_core::dialect::GenericDialect; use wren_core::mdl::context::create_ctx_with_mdl; use wren_core::mdl::function::{ @@ -283,8 +283,10 @@ impl PySessionContext { for row in 0..batch.num_rows() { let name = name_array.value(row).to_string(); - let param_names = Self::to_string_vec(param_names_array.value(row).as_string::()); - let param_types = Self::to_string_vec(param_types_array.value(row).as_string::()); + let param_names = + Self::to_string_vec(param_names_array.value(row).as_string::()); + let param_types = + Self::to_string_vec(param_types_array.value(row).as_string::()); let return_type = return_type_array.value(row).to_string(); let description = description_array.value(row).to_string(); let function_type = function_type_array.value(row).to_string(); @@ -304,11 +306,11 @@ impl PySessionContext { fn to_string_vec(array: &GenericByteArray>) -> Vec { array - .iter() - .map(|s| match s { - Some(s) => s.to_string(), - None => "".to_string(), - }) - .collect::>() + .iter() + .map(|s| match s { + Some(s) => s.to_string(), + None => "".to_string(), + }) + .collect::>() } } diff --git a/wren-core-py/tests/test_modeling_core.py b/wren-core-py/tests/test_modeling_core.py index 122743c9c..afafce7b6 100644 --- a/wren-core-py/tests/test_modeling_core.py +++ b/wren-core-py/tests/test_modeling_core.py @@ -135,8 +135,8 @@ def test_get_available_functions(): max_if = next(f for f in functions if f.name == "max_if") assert max_if.name == "max_if" assert max_if.function_type == "window" - assert max_if.param_names is "" - assert max_if.param_types is "" + assert max_if.param_names == "" + assert max_if.param_types == "" @pytest.mark.parametrize( From c23aa8231263d3e5cd692fd227027dbc4ca53db3 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Thu, 27 Mar 2025 17:49:18 +0800 Subject: [PATCH 07/24] disable test_functions case --- .../v3/connector/bigquery/test_functions.py | 31 ------------------- .../v3/connector/clickhouse/test_functions.py | 17 ---------- .../v3/connector/mssql/test_functions.py | 16 ---------- .../v3/connector/mysql/test_functions.py | 16 ---------- .../v3/connector/postgres/test_functions.py | 16 ---------- .../v3/connector/trino/test_functions.py | 16 ---------- 6 files changed, 112 deletions(-) diff --git a/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py b/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py index f308f177f..4596c860e 100644 --- a/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py @@ -115,34 +115,3 @@ async def test_aggregate_function(client, manifest_str: str, connection_info): "data": [[1]], "dtypes": {"col": "int64"}, } - - -async def test_functions(client, manifest_str: str, connection_info): - csv_parser = FunctionCsvParser(os.path.join(function_list_path, "bigquery.csv")) - sql_generator = SqlTestGenerator("bigquery") - for function in csv_parser.parse(): - # Skip window functions util https://github.com/Canner/wren-engine/issues/924 is resolved - if function.function_type == "window": - continue - # Skip functions with interval util https://github.com/Canner/wren-engine/issues/930 is resolved - if function.name in ( - "date_add", - "date_sub", - "date_diff", - "date_trunc", - "timestamp_add", - "timestamp_sub", - "timestamp_diff", - "timestamp_trunc", - ): - continue - sql = sql_generator.generate_sql(function) - response = await client.post( - url=f"{base_url}/query", - json={ - "connectionInfo": connection_info, - "manifestStr": manifest_str, - "sql": sql, - }, - ) - assert response.status_code == 200 diff --git a/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py b/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py index 5fc107c06..3b50a5e59 100644 --- a/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py @@ -107,20 +107,3 @@ async def test_aggregate_function(client, manifest_str: str, connection_info): "data": [[1]], "dtypes": {"col": "uint64"}, } - - -async def test_functions(client, manifest_str: str, connection_info): - csv_path = os.path.join(function_list_path, "clickhouse.csv") - csv_parser = FunctionCsvParser(csv_path) - sql_generator = SqlTestGenerator("clickhouse") - for function in csv_parser.parse(): - sql = sql_generator.generate_sql(function) - response = await client.post( - url=f"{base_url}/query", - json={ - "connectionInfo": connection_info, - "manifestStr": manifest_str, - "sql": sql, - }, - ) - assert response.status_code == 200 diff --git a/ibis-server/tests/routers/v3/connector/mssql/test_functions.py b/ibis-server/tests/routers/v3/connector/mssql/test_functions.py index 522ea6b6b..8e9716a0b 100644 --- a/ibis-server/tests/routers/v3/connector/mssql/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/mssql/test_functions.py @@ -107,19 +107,3 @@ async def test_aggregate_function(client, manifest_str: str, connection_info): "data": [[1]], "dtypes": {"col": "int64"}, } - - -async def test_functions(client, manifest_str: str, connection_info): - csv_parser = FunctionCsvParser(os.path.join(function_list_path, "mssql.csv")) - sql_generator = SqlTestGenerator("mssql") - for function in csv_parser.parse(): - sql = sql_generator.generate_sql(function) - response = await client.post( - url=f"{base_url}/query", - json={ - "connectionInfo": connection_info, - "manifestStr": manifest_str, - "sql": sql, - }, - ) - assert response.status_code == 200 diff --git a/ibis-server/tests/routers/v3/connector/mysql/test_functions.py b/ibis-server/tests/routers/v3/connector/mysql/test_functions.py index 884735525..adfbc27d2 100644 --- a/ibis-server/tests/routers/v3/connector/mysql/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/mysql/test_functions.py @@ -108,19 +108,3 @@ async def test_aggregate_function(client, manifest_str: str, connection_info): "data": [[1]], "dtypes": {"col": "int64"}, } - - -async def test_functions(client, manifest_str: str, connection_info): - csv_parser = FunctionCsvParser(os.path.join(function_list_path, "mysql.csv")) - sql_generator = SqlTestGenerator("mysql") - for function in csv_parser.parse(): - sql = sql_generator.generate_sql(function) - response = await client.post( - url=f"{base_url}/query", - json={ - "connectionInfo": connection_info, - "manifestStr": manifest_str, - "sql": sql, - }, - ) - assert response.status_code == 200 diff --git a/ibis-server/tests/routers/v3/connector/postgres/test_functions.py b/ibis-server/tests/routers/v3/connector/postgres/test_functions.py index 5cca621b7..ddb73954f 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/postgres/test_functions.py @@ -109,19 +109,3 @@ async def test_aggregate_function(client, manifest_str: str, connection_info): "data": [[1]], "dtypes": {"col": "int64"}, } - - -async def test_functions(client, manifest_str: str, connection_info): - csv_parser = FunctionCsvParser(os.path.join(function_list_path, "postgres.csv")) - sql_generator = SqlTestGenerator("postgres") - for function in csv_parser.parse(): - sql = sql_generator.generate_sql(function) - response = await client.post( - url=f"{base_url}/query", - json={ - "connectionInfo": connection_info, - "manifestStr": manifest_str, - "sql": sql, - }, - ) - assert response.status_code == 200 diff --git a/ibis-server/tests/routers/v3/connector/trino/test_functions.py b/ibis-server/tests/routers/v3/connector/trino/test_functions.py index a2fc1b466..530e62ec3 100644 --- a/ibis-server/tests/routers/v3/connector/trino/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/trino/test_functions.py @@ -107,19 +107,3 @@ async def test_aggregate_function(client, manifest_str: str, connection_info): "data": [[1]], "dtypes": {"col": "int64"}, } - - -async def test_functions(client, manifest_str: str, connection_info): - csv_parser = FunctionCsvParser(os.path.join(function_list_path, "trino.csv")) - sql_generator = SqlTestGenerator("trino") - for function in csv_parser.parse(): - sql = sql_generator.generate_sql(function) - response = await client.post( - url=f"{base_url}/query", - json={ - "connectionInfo": connection_info, - "manifestStr": manifest_str, - "sql": sql, - }, - ) - assert response.status_code == 200 From 26831108d6c438f42d467931592eaadcc0a9f963 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 01:23:11 +0800 Subject: [PATCH 08/24] fix parameter and test --- .../resources/function_list/postgres.csv | 10 +++--- ibis-server/tests/conftest.py | 2 +- .../v3/connector/postgres/test_functions.py | 6 ++-- wren-core-py/src/remote_functions.rs | 36 ++++++++++++++++--- wren-core-py/tests/functions.csv | 2 ++ wren-core-py/tests/test_modeling_core.py | 11 +++++- wren-core/core/src/logical_plan/utils.rs | 2 ++ 7 files changed, 55 insertions(+), 14 deletions(-) diff --git a/ibis-server/resources/function_list/postgres.csv b/ibis-server/resources/function_list/postgres.csv index 75f932122..fc0ce371f 100644 --- a/ibis-server/resources/function_list/postgres.csv +++ b/ibis-server/resources/function_list/postgres.csv @@ -11,16 +11,16 @@ scalar,convert_from,text,,"bytea,text","Convert from encoding" scalar,convert_to,bytea,,"text,text","Convert to encoding" scalar,extract,numeric,,"text,timestamp","Get subfield from date/time" scalar,format,text,,"text,array","Format string" -scalar,greatest,same as arg types,,array,"Greatest of arguments" +scalar,greatest,any,,array,"Greatest of arguments" scalar,host,text,,inet,"Extract host from IP address" scalar,isfinite,boolean,,timestamp,"Test for finite date/timestamp/interval" scalar,json_array_length,int,,json,"Length of JSON array" scalar,json_extract_path,json,,"json,array","Get JSON object at path" -scalar,json_object_keys,setof text,,json,"Get JSON object keys" +scalar,json_object_keys,array,,json,"Get JSON object keys" scalar,jsonb_array_length,int,,jsonb,"Length of JSONB array" scalar,jsonb_extract_path,jsonb,,"jsonb,array","Get JSONB object at path" -scalar,jsonb_object_keys,setof text,,jsonb,"Get JSONB object keys" -scalar,least,same as arg types,,array,"Least of arguments" +scalar,jsonb_object_keys,array,,jsonb,"Get JSONB object keys" +scalar,least,any,,array,"Least of arguments" scalar,mod,numeric,,"numeric,numeric","Modulo (remainder)" scalar,parse_ident,array,,"text,boolean","Parse qualified identifier" scalar,pg_client_encoding,name,,,"Current client encoding" @@ -30,7 +30,7 @@ scalar,quote_ident,text,,text,"Quote identifier" scalar,quote_literal,text,,any,"Quote literal" scalar,quote_nullable,text,,any,"Quote nullable" scalar,regexp_split_to_array,array,,"text,text","Split string by pattern" -scalar,regexp_split_to_table,setof text,,"text,text","Split string by pattern" +scalar,regexp_split_to_table,array,,"text,text","Split string by pattern" scalar,sign,numeric,,numeric,"Sign of number" scalar,to_json,json,,boolean,"Convert to JSON" scalar,to_number,numeric,,"text,text","Convert string to number" diff --git a/ibis-server/tests/conftest.py b/ibis-server/tests/conftest.py index 8332f040e..ece69a1f0 100644 --- a/ibis-server/tests/conftest.py +++ b/ibis-server/tests/conftest.py @@ -11,7 +11,7 @@ def file_path(path: str) -> str: return os.path.join(os.path.dirname(__file__), path) -DATAFUSION_FUNCTION_COUNT = 273 +DATAFUSION_FUNCTION_COUNT = 25941 @pytest.fixture(scope="session") diff --git a/ibis-server/tests/routers/v3/connector/postgres/test_functions.py b/ibis-server/tests/routers/v3/connector/postgres/test_functions.py index ddb73954f..2f92b95af 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/postgres/test_functions.py @@ -63,9 +63,9 @@ async def test_function_list(client): "name": "extract", "description": "Get subfield from date/time", "function_type": "scalar", - "param_names": None, - "param_types": "text,timestamp", - "return_type": "numeric", + "param_names": '', + "param_types": "Utf8,Timestamp(Nanosecond, None)", + "return_type": "Decimal128(38, 10)", } config.set_remote_function_list_path(None) diff --git a/wren-core-py/src/remote_functions.rs b/wren-core-py/src/remote_functions.rs index c090f3b58..0923d4a29 100644 --- a/wren-core-py/src/remote_functions.rs +++ b/wren-core-py/src/remote_functions.rs @@ -66,14 +66,28 @@ impl From for PyRemoteFunction { let param_names = remote_function.param_names.map(|names| { names .iter() - .map(|name| name.to_string()) + .filter_map(|name| { + if name.is_empty() { + None + } + else { + Some(name.to_string()) + } + }) .collect::>() .join(",") }); let param_types = remote_function.param_types.map(|types| { types .iter() - .map(|t| t.to_string()) + .filter_map(|t| { + if t.is_empty() { + None + } + else { + Some(t.to_string()) + } + }) .collect::>() .join(",") }); @@ -95,13 +109,27 @@ impl From for wren_core::mdl::function::RemoteFunction { let param_names = remote_function.param_names.map(|names| { names .split(",") - .map(|name| name.to_string()) + .filter_map(|name| { + if name.is_empty() { + None + } + else { + Some(name.to_string()) + } + }) .collect::>() }); let param_types = remote_function.param_types.map(|types| { types .split(",") - .map(|t| t.to_string()) + .filter_map(|t| { + if t.is_empty() { + None + } + else { + Some(t.to_string()) + } + }) .collect::>() }); wren_core::mdl::function::RemoteFunction { diff --git a/wren-core-py/tests/functions.csv b/wren-core-py/tests/functions.csv index cf9464767..9b6110cd5 100644 --- a/wren-core-py/tests/functions.csv +++ b/wren-core-py/tests/functions.csv @@ -1,3 +1,5 @@ function_type,name,return_type,param_names,param_types,description scalar,add_two,int,"f1,f2","int,int","Adds two numbers together." window,max_if,int,,,"If the condition is true, returns the maximum value in the window." +scalar,json_extract,jsonb,"input,pattern","jsonb,text","Extracts a value from a JSON object." +scalar,add_custom,int,,"int,int","Adds two numbers together." diff --git a/wren-core-py/tests/test_modeling_core.py b/wren-core-py/tests/test_modeling_core.py index afafce7b6..e3382b255 100644 --- a/wren-core-py/tests/test_modeling_core.py +++ b/wren-core-py/tests/test_modeling_core.py @@ -106,7 +106,7 @@ def test_read_function_list(): path = "tests/functions.csv" session_context = SessionContext(manifest_str, path) functions = session_context.get_available_functions() - assert len(functions) == 25943 + assert len(functions) == 25945 rewritten_sql = session_context.transform_sql( "SELECT add_two(c_custkey, c_custkey) FROM my_catalog.my_schema.customer" @@ -138,6 +138,15 @@ def test_get_available_functions(): assert max_if.param_names == "" assert max_if.param_types == "" + add_two = next(f for f in functions if f.name == "add_custom") + assert add_two.name == "add_custom" + assert add_two.function_type == "scalar" + assert add_two.description == "Adds two numbers together." + assert add_two.return_type == "Int32" + assert add_two.param_names == "" + assert add_two.param_types == "Int32,Int32" + + @pytest.mark.parametrize( ("value", "expected_error", "error_message"), diff --git a/wren-core/core/src/logical_plan/utils.rs b/wren-core/core/src/logical_plan/utils.rs index e07be54af..b3facbe8b 100644 --- a/wren-core/core/src/logical_plan/utils.rs +++ b/wren-core/core/src/logical_plan/utils.rs @@ -144,6 +144,8 @@ pub fn map_data_type(data_type: &str) -> Result { "date" => DataType::Date32, "interval" => DataType::Interval(IntervalUnit::DayTime), "json" => DataType::Utf8, // we don't have a JSON type, so we map it to Utf8 + "xml" => DataType::Utf8, // we don't have a XML type, so we map it to Utf8 + "jsonb" => DataType::Binary, // we don't have a JSONB type, so we map it to Binary "oid" => DataType::Int32, "bytea" => DataType::Binary, "uuid" => DataType::Utf8, // we don't have a UUID type, so we map it to Utf8 From fe194f7af5d0fcb5800db6157407cc7115e3d9ba Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 14:34:08 +0800 Subject: [PATCH 09/24] refactor the by pass function --- wren-core-py/src/context.rs | 11 +- wren-core-py/src/remote_functions.rs | 70 ++--- wren-core-py/tests/functions.csv | 2 + wren-core-py/tests/test_modeling_core.py | 37 ++- wren-core/core/src/logical_plan/utils.rs | 2 +- wren-core/core/src/mdl/function.rs | 378 ++++++++++++++++++----- wren-core/core/src/mdl/utils.rs | 8 +- 7 files changed, 378 insertions(+), 130 deletions(-) diff --git a/wren-core-py/src/context.rs b/wren-core-py/src/context.rs index c946fca83..45cfc0023 100644 --- a/wren-core-py/src/context.rs +++ b/wren-core-py/src/context.rs @@ -304,13 +304,12 @@ impl PySessionContext { Ok(functions) } - fn to_string_vec(array: &GenericByteArray>) -> Vec { + fn to_string_vec( + array: &GenericByteArray>, + ) -> Vec> { array .iter() - .map(|s| match s { - Some(s) => s.to_string(), - None => "".to_string(), - }) - .collect::>() + .map(|s| s.map(|s| s.to_string())) + .collect::>>() } } diff --git a/wren-core-py/src/remote_functions.rs b/wren-core-py/src/remote_functions.rs index 0923d4a29..acda46a92 100644 --- a/wren-core-py/src/remote_functions.rs +++ b/wren-core-py/src/remote_functions.rs @@ -63,38 +63,35 @@ impl PyRemoteFunction { impl From for PyRemoteFunction { fn from(remote_function: wren_core::mdl::function::RemoteFunction) -> Self { - let param_names = remote_function.param_names.map(|names| { - names - .iter() - .filter_map(|name| { - if name.is_empty() { - None - } - else { - Some(name.to_string()) - } - }) - .collect::>() - .join(",") - }); - let param_types = remote_function.param_types.map(|types| { - types - .iter() - .filter_map(|t| { - if t.is_empty() { - None - } - else { - Some(t.to_string()) - } - }) - .collect::>() - .join(",") - }); + let param_names = remote_function + .param_names + .map(|names| { + names + .into_iter() + .flatten() + .collect::>() + .join(",") + }) + .and_then(|types| if types.is_empty() { None } else { Some(types) }); + let param_types = remote_function + .param_types + .map(|types| { + types + .into_iter() + .flatten() + .collect::>() + .join(",") + }) + .and_then(|types| if types.is_empty() { None } else { Some(types) }); + let return_type = if remote_function.return_type.is_empty() { + None + } else { + Some(remote_function.return_type) + }; Self { function_type: remote_function.function_type.to_string(), name: remote_function.name, - return_type: Some(remote_function.return_type), + return_type, param_names, param_types, description: remote_function.description, @@ -109,34 +106,31 @@ impl From for wren_core::mdl::function::RemoteFunction { let param_names = remote_function.param_names.map(|names| { names .split(",") - .filter_map(|name| { + .map(|name| { if name.is_empty() { None - } - else { + } else { Some(name.to_string()) } }) - .collect::>() + .collect::>>() }); let param_types = remote_function.param_types.map(|types| { types .split(",") - .filter_map(|t| { + .map(|t| { if t.is_empty() { None - } - else { + } else { Some(t.to_string()) } }) - .collect::>() + .collect::>>() }); wren_core::mdl::function::RemoteFunction { function_type: FunctionType::from_str(&remote_function.function_type) .unwrap(), name: remote_function.name, - // TODO: Get the return type form DataFusion SessionState return_type: remote_function.return_type.unwrap_or("string".to_string()), param_names, param_types, diff --git a/wren-core-py/tests/functions.csv b/wren-core-py/tests/functions.csv index 9b6110cd5..a2691eb86 100644 --- a/wren-core-py/tests/functions.csv +++ b/wren-core-py/tests/functions.csv @@ -3,3 +3,5 @@ scalar,add_two,int,"f1,f2","int,int","Adds two numbers together." window,max_if,int,,,"If the condition is true, returns the maximum value in the window." scalar,json_extract,jsonb,"input,pattern","jsonb,text","Extracts a value from a JSON object." scalar,add_custom,int,,"int,int","Adds two numbers together." +scalar,test_same_as_input_array,same_as_input_first_array_element,,,"Returns the greatest value from the first array." +scalar,test_return_type,same_as_input,,,"Returns the same type as the input." diff --git a/wren-core-py/tests/test_modeling_core.py b/wren-core-py/tests/test_modeling_core.py index e3382b255..bdf62d098 100644 --- a/wren-core-py/tests/test_modeling_core.py +++ b/wren-core-py/tests/test_modeling_core.py @@ -106,7 +106,7 @@ def test_read_function_list(): path = "tests/functions.csv" session_context = SessionContext(manifest_str, path) functions = session_context.get_available_functions() - assert len(functions) == 25945 + assert len(functions) == 25947 rewritten_sql = session_context.transform_sql( "SELECT add_two(c_custkey, c_custkey) FROM my_catalog.my_schema.customer" @@ -135,16 +135,33 @@ def test_get_available_functions(): max_if = next(f for f in functions if f.name == "max_if") assert max_if.name == "max_if" assert max_if.function_type == "window" - assert max_if.param_names == "" - assert max_if.param_types == "" + assert max_if.param_names == None + assert max_if.param_types == None - add_two = next(f for f in functions if f.name == "add_custom") - assert add_two.name == "add_custom" - assert add_two.function_type == "scalar" - assert add_two.description == "Adds two numbers together." - assert add_two.return_type == "Int32" - assert add_two.param_names == "" - assert add_two.param_types == "Int32,Int32" + func = next(f for f in functions if f.name == "add_custom") + assert func.name == "add_custom" + assert func.function_type == "scalar" + assert func.description == "Adds two numbers together." + assert func.return_type == "Int32" + assert func.param_names == None + assert func.param_types == "Int32,Int32" + + func = next(f for f in functions if f.name == "test_same_as_input_array") + assert func.name == "test_same_as_input_array" + assert func.function_type == "scalar" + assert func.description == "Returns the greatest value from the first array." + assert func.return_type == None + assert func.param_names == None + assert func.param_types == None + + + func = next(f for f in functions if f.name == "test_return_type") + assert func.name == "test_return_type" + assert func.function_type == "scalar" + assert func.description == "Returns the same type as the input." + assert func.return_type == None + assert func.param_names == None + assert func.param_types == None diff --git a/wren-core/core/src/logical_plan/utils.rs b/wren-core/core/src/logical_plan/utils.rs index b3facbe8b..3ac8c03e3 100644 --- a/wren-core/core/src/logical_plan/utils.rs +++ b/wren-core/core/src/logical_plan/utils.rs @@ -144,7 +144,7 @@ pub fn map_data_type(data_type: &str) -> Result { "date" => DataType::Date32, "interval" => DataType::Interval(IntervalUnit::DayTime), "json" => DataType::Utf8, // we don't have a JSON type, so we map it to Utf8 - "xml" => DataType::Utf8, // we don't have a XML type, so we map it to Utf8 + "xml" => DataType::Utf8, // we don't have a XML type, so we map it to Utf8 "jsonb" => DataType::Binary, // we don't have a JSONB type, so we map it to Binary "oid" => DataType::Int32, "bytea" => DataType::Binary, diff --git a/wren-core/core/src/mdl/function.rs b/wren-core/core/src/mdl/function.rs index c2c57223a..942d57a29 100644 --- a/wren-core/core/src/mdl/function.rs +++ b/wren-core/core/src/mdl/function.rs @@ -1,6 +1,6 @@ use datafusion::arrow::datatypes::{DataType, Field}; -use datafusion::common::internal_err; use datafusion::common::Result; +use datafusion::common::{internal_err, not_impl_err}; use datafusion::logical_expr::function::{ AccumulatorArgs, PartitionEvaluatorArgs, WindowUDFFieldArgs, }; @@ -21,8 +21,8 @@ pub struct RemoteFunction { pub function_type: FunctionType, pub name: String, pub return_type: String, - pub param_names: Option>, - pub param_types: Option>, + pub param_names: Option>>, + pub param_types: Option>>, pub description: Option, } @@ -42,15 +42,20 @@ impl RemoteFunction { Signature::one_of(signatures, Volatility::Volatile) } - fn transform_param_type(param_types: &[String]) -> Option> { + fn transform_param_type(param_types: &[Option]) -> Option> { let types = param_types .iter() - .map(|t| map_data_type(t.as_str()).ok()) + .map(|t| t.clone().map(|x| map_data_type(x.as_str()).ok())) .collect::>(); if types.iter().any(|x| x.is_none()) { return None; } - Some(types.into_iter().map(|x| x.unwrap().clone()).collect()) + Some( + types + .into_iter() + .map(|x| x.flatten().unwrap().clone()) + .collect(), + ) } } @@ -86,13 +91,77 @@ impl FromStr for FunctionType { } } +/// The return type of the function. +/// It can be a specific data type, the same as the input type, or the same as the input array element type. +/// +/// The return type is used to generate the logical plan and unparsed them to SQL. +/// It should not be used to check the return type of the function execution. +#[derive(Debug)] +pub enum ReturnType { + /// The return type is a specific data type + Specific(DataType), + /// The return type is the same as the input type + SameAsInput, + /// If the input type is array, the return type is the same as the element type of the first array argument + /// e.g. `greatest(array)` will return `int` + SameAsInputFirstArrayElement, +} + +impl Display for ReturnType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ReturnType::Specific(data_type) => write!(f, "{}", data_type), + ReturnType::SameAsInput => write!(f, "same_as_input"), + ReturnType::SameAsInputFirstArrayElement => { + write!(f, "same_as_input_first_array_element") + } + } + } +} +impl FromStr for ReturnType { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "same_as_input" => Ok(ReturnType::SameAsInput), + "same_as_input_first_array_element" => { + Ok(ReturnType::SameAsInputFirstArrayElement) + } + _ => map_data_type(s) + .map(ReturnType::Specific) + .map_err(|e| e.to_string()), + } + } +} + +impl ReturnType { + pub fn to_data_type(&self, arg_types: &[DataType]) -> Result { + Ok(match self { + ReturnType::Specific(data_type) => data_type.clone(), + ReturnType::SameAsInput => { + arg_types.first().cloned().unwrap_or(DataType::Null) + } + ReturnType::SameAsInputFirstArrayElement => { + if arg_types.is_empty() { + return not_impl_err!("No input type"); + } + if let DataType::List(field) = &arg_types[0] { + field.data_type().clone() + } else { + return not_impl_err!("Input type is not array"); + } + } + }) + } +} + /// A scalar UDF that will be bypassed when planning logical plan. /// This is used to register the remote function to the context. The function should not be /// invoked by DataFusion. It's only used to generate the logical plan and unparsed them to SQL. #[derive(Debug)] pub struct ByPassScalarUDF { name: String, - return_type: DataType, + return_type: ReturnType, signature: Signature, doc: Option, } @@ -101,7 +170,7 @@ impl ByPassScalarUDF { pub fn new(name: &str, return_type: DataType) -> Self { Self { name: name.to_string(), - return_type, + return_type: ReturnType::Specific(return_type), signature: Signature::one_of( vec![TypeSignature::Nullary, TypeSignature::VariadicAny], Volatility::Volatile, @@ -113,26 +182,35 @@ impl ByPassScalarUDF { impl From for ByPassScalarUDF { fn from(func: RemoteFunction) -> Self { - let return_type = map_data_type(func.return_type.as_str()).unwrap(); - let mut builder = DocumentationBuilder::new_with_details( - DocSection::default(), - func.description.clone().unwrap_or("".to_string()), - "", - ); - let signature = func.get_signature(); - if let Some(param_names) = func.param_names.as_ref() { - for (i, name) in param_names.iter().enumerate() { - builder = builder - .with_argument(name, func.param_types.as_ref().unwrap()[i].as_str()); - } - } + // just panic if the return type is not valid to avoid we input invalid type + let return_type = ReturnType::from_str(&func.return_type).unwrap(); ByPassScalarUDF { - name: func.name, return_type, - signature, - doc: Some(builder.build()), + signature: func.get_signature(), + doc: Some(build_document(&func)), + name: func.name, + } + } +} + +fn build_document(func: &RemoteFunction) -> Documentation { + let mut builder = DocumentationBuilder::new_with_details( + DocSection::default(), + func.description.clone().unwrap_or("".to_string()), + "", + ); + if let Some(param_names) = func.param_names.as_ref() { + for (i, name) in param_names.iter().enumerate() { + let description = func + .param_types + .as_ref() + .map(|types| types[i].clone().unwrap_or("".to_string())) + .unwrap_or("".to_string()); + builder = builder + .with_argument(name.clone().unwrap_or("".to_string()), description); } } + builder.build() } impl ScalarUDFImpl for ByPassScalarUDF { @@ -148,8 +226,8 @@ impl ScalarUDFImpl for ByPassScalarUDF { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(self.return_type.clone()) + fn return_type(&self, arg_types: &[DataType]) -> Result { + self.return_type.to_data_type(arg_types) } fn invoke(&self, _args: &[ColumnarValue]) -> Result { @@ -166,7 +244,7 @@ impl ScalarUDFImpl for ByPassScalarUDF { #[derive(Debug)] pub struct ByPassAggregateUDF { name: String, - return_type: DataType, + return_type: ReturnType, signature: Signature, doc: Option, } @@ -175,7 +253,7 @@ impl ByPassAggregateUDF { pub fn new(name: &str, return_type: DataType) -> Self { Self { name: name.to_string(), - return_type, + return_type: ReturnType::Specific(return_type), signature: Signature::one_of( vec![TypeSignature::VariadicAny, TypeSignature::Nullary], Volatility::Volatile, @@ -187,25 +265,13 @@ impl ByPassAggregateUDF { impl From for ByPassAggregateUDF { fn from(func: RemoteFunction) -> Self { - let return_type = map_data_type(func.return_type.as_str()).unwrap(); - let mut builder = DocumentationBuilder::new_with_details( - DocSection::default(), - func.description.clone().unwrap_or("".to_string()), - "", - ); - let signature = func.get_signature(); - if let Some(param_names) = func.param_names.as_ref() { - for (i, name) in param_names.iter().enumerate() { - builder = builder - .with_argument(name, func.param_types.as_ref().unwrap()[i].as_str()); - } - } - + // just panic if the return type is not valid to avoid we input invalid type + let return_type = ReturnType::from_str(&func.return_type).unwrap(); ByPassAggregateUDF { - name: func.name, return_type, - signature, - doc: Some(builder.build()), + signature: func.get_signature(), + doc: Some(build_document(&func)), + name: func.name, } } } @@ -223,8 +289,8 @@ impl AggregateUDFImpl for ByPassAggregateUDF { &self.signature } - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(self.return_type.clone()) + fn return_type(&self, arg_types: &[DataType]) -> Result { + self.return_type.to_data_type(arg_types) } fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result> { @@ -241,7 +307,7 @@ impl AggregateUDFImpl for ByPassAggregateUDF { #[derive(Debug)] pub struct ByPassWindowFunction { name: String, - return_type: DataType, + return_type: ReturnType, signature: Signature, doc: Option, } @@ -250,7 +316,7 @@ impl ByPassWindowFunction { pub fn new(name: &str, return_type: DataType) -> Self { Self { name: name.to_string(), - return_type, + return_type: ReturnType::Specific(return_type), signature: Signature::one_of( vec![TypeSignature::VariadicAny, TypeSignature::Nullary], Volatility::Volatile, @@ -262,25 +328,13 @@ impl ByPassWindowFunction { impl From for ByPassWindowFunction { fn from(func: RemoteFunction) -> Self { - let return_type = map_data_type(func.return_type.as_str()).unwrap(); - let mut builder = DocumentationBuilder::new_with_details( - DocSection::default(), - func.description.clone().unwrap_or("".to_string()), - "", - ); - let signature = func.get_signature(); - if let Some(param_names) = func.param_names.as_ref() { - for (i, name) in param_names.iter().enumerate() { - builder = builder - .with_argument(name, func.param_types.as_ref().unwrap()[i].as_str()); - } - } - + // just panic if the return type is not valid to avoid we input invalid type + let return_type = ReturnType::from_str(&func.return_type).unwrap(); ByPassWindowFunction { - name: func.name, return_type, - signature, - doc: Some(builder.build()), + signature: func.get_signature(), + doc: Some(build_document(&func)), + name: func.name, } } } @@ -306,11 +360,8 @@ impl WindowUDFImpl for ByPassWindowFunction { } fn field(&self, field_args: WindowUDFFieldArgs) -> Result { - Ok(Field::new( - field_args.name(), - self.return_type.clone(), - false, - )) + let return_type = self.return_type.to_data_type(field_args.input_types())?; + Ok(Field::new(field_args.name(), return_type, false)) } fn documentation(&self) -> Option<&Documentation> { @@ -320,12 +371,16 @@ impl WindowUDFImpl for ByPassWindowFunction { #[cfg(test)] mod test { + use std::sync::Arc; + use crate::mdl::function::{ - ByPassAggregateUDF, ByPassScalarUDF, ByPassWindowFunction, + ByPassAggregateUDF, ByPassScalarUDF, ByPassWindowFunction, FunctionType, + RemoteFunction, }; - use datafusion::arrow::datatypes::DataType; + use datafusion::arrow::datatypes::{DataType, Field}; use datafusion::common::Result; - use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; + use datafusion::logical_expr::TypeSignature; + use datafusion::logical_expr::{AggregateUDF, ScalarUDF, ScalarUDFImpl, WindowUDF}; use datafusion::prelude::SessionContext; #[tokio::test] @@ -413,4 +468,179 @@ mod test { Ok(()) } + + #[tokio::test] + async fn test_remote_function_to_bypass_func() -> Result<()> { + // full information + let remote_function = RemoteFunction { + function_type: FunctionType::Scalar, + name: "test".to_string(), + return_type: "string".to_string(), + param_names: Some(vec![Some("a".to_string()), Some("b".to_string())]), + param_types: Some(vec![Some("int".to_string()), Some("string".to_string())]), + description: Some("test function".to_string()), + }; + let udf = ByPassScalarUDF::from(remote_function); + assert_eq!(udf.name, "test"); + assert_eq!( + udf.return_type.to_data_type(&[DataType::Int64]).unwrap(), + DataType::Utf8 + ); + assert_eq!( + udf.signature.type_signature, + TypeSignature::OneOf(vec![TypeSignature::Exact(vec![ + DataType::Int32, + DataType::Utf8 + ])]) + ); + let doc = udf.documentation().unwrap().clone(); + assert_eq!(doc.description, "test function"); + assert_eq!( + doc.arguments.unwrap(), + vec![ + ("a".to_string(), "int".to_string()), + ("b".to_string(), "string".to_string()), + ] + ); + + // missing param names + let remote_function = RemoteFunction { + function_type: FunctionType::Scalar, + name: "test".to_string(), + return_type: "string".to_string(), + param_names: None, + param_types: Some(vec![Some("int".to_string()), Some("string".to_string())]), + description: Some("test function".to_string()), + }; + + let udf = ByPassScalarUDF::from(remote_function); + assert_eq!(udf.name, "test"); + assert_eq!( + udf.return_type.to_data_type(&[DataType::Int64]).unwrap(), + DataType::Utf8 + ); + assert_eq!( + udf.signature.type_signature, + TypeSignature::OneOf(vec![TypeSignature::Exact(vec![ + DataType::Int32, + DataType::Utf8 + ])]) + ); + let doc = udf.documentation().unwrap().clone(); + assert_eq!(doc.description, "test function"); + assert_eq!(doc.arguments, None); + + // missing param types + let remote_function = RemoteFunction { + function_type: FunctionType::Scalar, + name: "test".to_string(), + return_type: "string".to_string(), + param_names: Some(vec![Some("a".to_string()), Some("b".to_string())]), + param_types: None, + description: Some("test function".to_string()), + }; + + let udf = ByPassScalarUDF::from(remote_function); + assert_eq!(udf.name, "test"); + assert_eq!( + udf.return_type.to_data_type(&[DataType::Int64]).unwrap(), + DataType::Utf8 + ); + assert_eq!( + udf.signature.type_signature, + TypeSignature::OneOf(vec![ + TypeSignature::Nullary, + TypeSignature::VariadicAny + ]) + ); + let doc = udf.documentation().unwrap().clone(); + assert_eq!(doc.description, "test function"); + assert_eq!( + doc.arguments.unwrap(), + vec![ + ("a".to_string(), "".to_string()), + ("b".to_string(), "".to_string()), + ] + ); + + // same as input + let remote_function = RemoteFunction { + function_type: FunctionType::Scalar, + name: "test".to_string(), + return_type: "same_as_input".to_string(), + param_names: Some(vec![Some("a".to_string())]), + param_types: Some(vec![Some("int".to_string())]), + description: Some("test function".to_string()), + }; + let udf = ByPassScalarUDF::from(remote_function); + assert_eq!(udf.name, "test"); + assert_eq!( + udf.return_type.to_data_type(&[DataType::Int64]).unwrap(), + DataType::Int64 + ); + assert_eq!( + udf.signature.type_signature, + TypeSignature::OneOf(vec![TypeSignature::Exact(vec![DataType::Int32])]) + ); + let doc = udf.documentation().unwrap().clone(); + assert_eq!(doc.description, "test function"); + assert_eq!( + doc.arguments.unwrap(), + vec![("a".to_string(), "int".to_string()),] + ); + + // same as input first array element + let remote_function = RemoteFunction { + function_type: FunctionType::Scalar, + name: "test".to_string(), + return_type: "same_as_input_first_array_element".to_string(), + param_names: Some(vec![Some("a".to_string())]), + param_types: Some(vec![Some("array".to_string())]), + description: Some("test function".to_string()), + }; + let udf = ByPassScalarUDF::from(remote_function); + let list_type = + DataType::List(Arc::new(Field::new("element", DataType::Int32, false))); + assert_eq!(udf.name, "test"); + assert_eq!( + udf.return_type.to_data_type(&[list_type.clone()]).unwrap(), + DataType::Int32 + ); + assert_eq!( + udf.signature.type_signature, + TypeSignature::OneOf(vec![TypeSignature::Exact(vec![list_type.clone()])]) + ); + let doc = udf.documentation().unwrap().clone(); + assert_eq!(doc.description, "test function"); + + // same as input missing param types + let remote_function = RemoteFunction { + function_type: FunctionType::Scalar, + name: "test".to_string(), + return_type: "same_as_input".to_string(), + param_names: Some(vec![Some("a".to_string())]), + param_types: None, + description: Some("test function".to_string()), + }; + let udf = ByPassScalarUDF::from(remote_function); + assert_eq!(udf.name, "test"); + assert_eq!( + udf.return_type.to_data_type(&[DataType::Int64]).unwrap(), + DataType::Int64 + ); + assert_eq!( + udf.signature.type_signature, + TypeSignature::OneOf(vec![ + TypeSignature::Nullary, + TypeSignature::VariadicAny + ]) + ); + let doc = udf.documentation().unwrap().clone(); + assert_eq!(doc.description, "test function"); + assert_eq!( + doc.arguments.unwrap(), + vec![("a".to_string(), "".to_string()),] + ); + Ok(()) + } } diff --git a/wren-core/core/src/mdl/utils.rs b/wren-core/core/src/mdl/utils.rs index 79939afe6..560eeca18 100644 --- a/wren-core/core/src/mdl/utils.rs +++ b/wren-core/core/src/mdl/utils.rs @@ -230,7 +230,13 @@ pub fn to_remote_field( let columns = collect_columns(expr); columns .into_iter() - .map(|c| Ok(Field::new(c.value, try_map_data_type(&column.r#type)?, false))) + .map(|c| { + Ok(Field::new( + c.value, + try_map_data_type(&column.r#type)?, + false, + )) + }) .collect::>() } else { Ok(vec![to_field(column)?]) From 6f272df67337b8b9cdd2b059edc8fdef0a1cd7df Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 15:11:19 +0800 Subject: [PATCH 10/24] review the list and add tool script --- .../resources/function_list/bigquery.csv | 47 +----- .../resources/function_list/canner.csv | 63 +------- .../resources/function_list/clickhouse.csv | 22 +-- ibis-server/resources/function_list/mssql.csv | 17 +-- ibis-server/resources/function_list/mysql.csv | 33 ---- .../resources/function_list/postgres.csv | 2 - .../resources/function_list/snowflake.csv | 141 ++++-------------- ibis-server/resources/function_list/trino.csv | 14 +- ibis-server/tools/remote_function_check.py | 42 ++++++ 9 files changed, 81 insertions(+), 300 deletions(-) create mode 100644 ibis-server/tools/remote_function_check.py diff --git a/ibis-server/resources/function_list/bigquery.csv b/ibis-server/resources/function_list/bigquery.csv index e4bde9110..7573b9b58 100644 --- a/ibis-server/resources/function_list/bigquery.csv +++ b/ibis-server/resources/function_list/bigquery.csv @@ -1,55 +1,15 @@ function_type,name,return_type,param_names,param_types,description aggregate,countif,int,,boolean,"Counts the rows where a condition is true." -aggregate,corr,float,,"float,float","Calculates the correlation coefficient of a set of numbers." -aggregate,covar_pop,float,,"float,float","Calculates the population covariance of a set of numbers." -aggregate,covar_samp,float,,"float,float","Calculates the sample covariance of a set of numbers." -aggregate,stddev_pop,float,,"float","Calculates the population standard deviation of a set of numbers." -aggregate,stddev_samp,float,,"float","Calculates the sample standard deviation of a set of numbers." -aggregate,var_pop,float,,"float","Calculates the population variance of a set of numbers." -aggregate,bit_and,int,,"int","Returns the bitwise AND of non-NULL input values." -aggregate,bit_or,int,,"int","Returns the bitwise OR of non-NULL input values." -aggregate,bit_xor,int,,"int","Returns the bitwise XOR of non-NULL input values." -aggregate,any_value,any,,"any","Returns any arbitrary value from the input values." -aggregate,array_agg,array,,"any","Aggregates values into an array." -aggregate,string_agg,text,,"text,text","Aggregates string values with a delimiter." -aggregate,count,int,,"any","Counts the number of rows." -aggregate,max,any,,"any","Returns the maximum value." -aggregate,min,any,,"any","Returns the minimum value." -aggregate,sum,any,,"any","Returns the sum of values." -aggregate,avg,float,,"int","Returns the average of values." -scalar,acos,float,,"float","Returns the arccosine of a number." -scalar,asin,float,,"float","Returns the arcsine of a number." -scalar,atan,float,,"float","Returns the arctangent of a number." -scalar,atan2,float,,"float,float","Returns the arctangent of two numbers." -scalar,cos,float,,"float","Returns the cosine of a number." -scalar,cosh,float,,"float","Returns the hyperbolic cosine of a number." -scalar,sin,float,,"float","Returns the sine of a number." -scalar,sinh,float,,"float","Returns the hyperbolic sine of a number." -scalar,tan,float,,"float","Returns the tangent of a number." -scalar,tanh,float,,"float","Returns the hyperbolic tangent of a number." -scalar,greatest,any,,"any","Returns the greatest value in a list of expressions." -scalar,least,any,,"any","Returns the least value in a list of expressions." +aggregate,any_value,same_as_input,,"any","Returns any arbitrary value from the input values." scalar,format,text,,"text","Formats values into a string." -scalar,lpad,text,,"text,int","Pads a string on the left to a certain length." -scalar,rpad,text,,"text,int","Pads a string on the right to a certain length." -scalar,left,text,,"text,int","Returns a substring from the beginning of a string." -scalar,right,text,,"text,int","Returns a substring from the end of a string." -scalar,array_length,int,,"array","Returns the length of an array." -scalar,array_reverse,array,,"array","Reverses the elements in an array." -scalar,array_concat,array,,"array,array","Concatenates multiple arrays into one." -scalar,array_to_string,text,,"array,text","Converts an array to a single string." scalar,safe_divide,float,,"float,float","Divides two numbers, returning NULL if the divisor is zero." scalar,safe_multiply,float,,"float,float","Multiplies two numbers, returning NULL if an overflow occurs." scalar,safe_add,float,,"float,float","Adds two numbers, returning NULL if an overflow occurs." scalar,safe_subtract,float,,"float,float","Subtracts two numbers, returning NULL if an overflow occurs." -scalar,abs,float,,"float","Returns the absolute value of a number." -scalar,floor,int,,"float","Rounds down to the nearest integer." -scalar,current_date,date,,"","Returns the current date." scalar,current_datetime,timestamp,,"","Returns current date and time." scalar,date_add,date,,"date,int","Adds a number of day to a date." scalar,date_sub,date,,"date,interval","Subtracts a specified interval from a date." scalar,date_diff,int,,"date,date,granularity","Returns the difference between two dates." -scalar,date_trunc,date,,"date,granularity","Truncates a date to a specified granularity." scalar,timestamp_add,timestamp,,"timestamp,granularity","Adds a specified interval to a timestamp." scalar,timestamp_sub,timestamp,,"timestamp,granularity","Subtracts a specified interval from a timestamp." scalar,timestamp_diff,int,,"timestamp,timestamp,granularity","Returns the difference between two timestamps." @@ -73,8 +33,3 @@ scalar,bool,boolean,,"any","Converts a JSON value to SQL boolean type." scalar,float64,float,,"any","Converts a JSON value to SQL float type." scalar,int64,int,,"any","Converts a JSON value to SQL int type." scalar,string,text,,"any","Converts a JSON value to SQL text type." -window,cume_dist,float,,"","Gets the cumulative distribution (relative position (0,1]) of each row within a window." -window,dense_rank,int,,"","Gets the dense rank (1-based, no gaps) of each row within a window." -window,percent_rank,float,,"","Gets the percentile rank (from 0 to 1) of each row within a window." -window,rank,int,,"","Gets the rank (1-based) of each row within a window." -window,row_number,int,,"","Gets the sequential row number (1-based) of each row within a window." diff --git a/ibis-server/resources/function_list/canner.csv b/ibis-server/resources/function_list/canner.csv index afb1154ae..b5473e55e 100644 --- a/ibis-server/resources/function_list/canner.csv +++ b/ibis-server/resources/function_list/canner.csv @@ -1,38 +1,14 @@ function_type,name,return_type,description aggregate,approx_percentile,same as input,"Approximates percentile" aggregate,approximate_distinct,bigint,"Approximates count of distinct values" -aggregate,array_agg,array,"Aggregates values into array" -aggregate,avg,double,"Returns average of values" -aggregate,corr,double,"Returns correlation coefficient" -aggregate,count,bigint,"Counts number of rows" -aggregate,covar_samp,double,"Returns sample covariance" aggregate,map_agg,map,"Aggregates key/value pairs into map" -aggregate,max,same as input,"Returns maximum value" -aggregate,min,same as input,"Returns minimum value" -aggregate,regr_intercept,double,"Returns linear regression intercept" aggregate,regr_slope,double,"Returns linear regression slope" -aggregate,string_agg,varchar,"Concatenates strings with delimiter" -aggregate,sum,numeric,"Returns sum of values" -scalar,abs,double,"Returns absolute value of the argument" -scalar,array_distinct,array,"Removes duplicate values from array" -scalar,array_join,varchar,"Joins array elements with delimiter" -scalar,array_sort,array,"Sorts array elements" -scalar,cardinality,bigint,"Returns size of array or map" -scalar,cbrt,double,"Returns cube root of the argument" -scalar,ceil,double,"Rounds up to nearest integer" -scalar,concat,varchar,"Concatenates given strings" -scalar,contains,boolean,"Checks if array contains element" -scalar,current_date,date,"Returns current date" scalar,date_add,date,"Adds interval to date" scalar,date_sub,date,"Subtracts interval from date" -scalar,date_trunc,timestamp,"Truncates timestamp to specified precision" -scalar,element_at,any,"Returns element at specified position in array" -scalar,filter,array,"Filters array using lambda" -scalar,floor,double,"Rounds down to nearest integer" +scalar,filter,same_as_input,"Filters array using lambda" scalar,format_datetime,varchar,"Formats datetime according to format string" scalar,from_base64,varbinary,"Converts base64 to binary" scalar,from_hex,varbinary,"Converts hex string to binary" -scalar,from_unixtime,timestamp,"Converts unix timestamp to timestamp" scalar,hamming_distance,bigint,"Calculates Hamming distance" scalar,is_finite,boolean,"Tests if value is finite" scalar,is_infinite,boolean,"Tests if value is infinite" @@ -40,47 +16,18 @@ scalar,is_nan,boolean,"Tests if value is NaN" scalar,json_extract,json,"Extracts JSON by JSONPath" scalar,json_format,varchar,"Pretty prints JSON" scalar,json_parse,json,"Parses string as JSON" -scalar,length,bigint,"Returns length of string" scalar,levenshtein_distance,bigint,"Calculates Levenshtein distance" -scalar,lower,varchar,"Converts string to lowercase" scalar,map_concat,map,"Concatenates two maps" -scalar,map_keys,array,"Returns array of map keys" -scalar,map_values,array,"Returns array of map values" -scalar,md5,varchar,"Computes MD5 hash" scalar,parse_datetime,timestamp,"Parses string to datetime using format" -scalar,reduce,any,"Reduces array to single value using lambda" +scalar,reduce,same_as_input,"Reduces array to single value using lambda" scalar,regexp_extract,varchar,"Extracts substring using regex" -scalar,regexp_like,boolean,"Tests if string matches regex" -scalar,regexp_replace,varchar,"Replaces substring using regex" -scalar,replace,varchar,"Replaces substring in string" -scalar,round,double,"Rounds to nearest integer or decimal places" -scalar,sha256,varchar,"Computes SHA256 hash" -scalar,split,array,"Splits string by delimiter into array" -scalar,split_part,varchar,"Returns specific part from split string" -scalar,strpos,bigint,"Returns position of substring" -scalar,substr,varchar,"Extracts substring from string" +scalar,split,array,"Splits string by delimiter into array" scalar,to_base64,varchar,"Converts binary to base64" -scalar,to_hex,varchar,"Converts number to hex string" -scalar,to_unixtime,double,"Converts timestamp to unix timestamp" -scalar,transform,array,"Applies lambda to each element" -scalar,trim,varchar,"Removes leading and trailing whitespace" -scalar,try,same as input,"Returns null if evaluation fails" -scalar,upper,varchar,"Converts string to uppercase" +scalar,transform,same_as_input,"Applies lambda to each element" +scalar,try,same_as_input,"Returns null if evaluation fails" scalar,url_decode,varchar,"Decodes URL encoded string" scalar,url_encode,varchar,"URL encodes string" -scalar,uuid,varchar,"Generates random UUID" scalar,word_stem,varchar,"Returns word stem (English only)" scalar,xxhash64,bigint,"Computes xxHash64 hash" scalar,zip_with,array,"Combines two arrays using lambda" -window,cume_dist,double,"Returns cumulative distribution" -window,dense_rank,bigint,"Returns rank without gaps" -window,first_value,any,"Returns first value in window" -window,lag,any,"Returns value from previous row" -window,last_value,any,"Returns last value in window" window,lead,any,"Returns value from following row" -window,nth_value,any,"Returns nth value in window" -window,nth_value,any,"Returns value at specified row" -window,ntile,bigint,"Divides rows into buckets" -window,percent_rank,double,"Returns percent rank of row" -window,rank,bigint,"Returns rank with gaps" -window,row_number,bigint,"Returns sequential row number" diff --git a/ibis-server/resources/function_list/clickhouse.csv b/ibis-server/resources/function_list/clickhouse.csv index 92342b001..bdd236ca9 100644 --- a/ibis-server/resources/function_list/clickhouse.csv +++ b/ibis-server/resources/function_list/clickhouse.csv @@ -1,26 +1,6 @@ function_type,name,return_type,param_names,param_types,description -aggregate,uniq,UInt64,,Any,"Approximate number of different values using HyperLogLog." -scalar,abs,Numeric,,Numeric,"Returns absolute value." -scalar,round,Float,,Float, "Rounds number to specified decimal places." -scalar,floor,Float,,Float,"Rounds down to nearest integer." -scalar,ceil,Float,,Float,"Rounds up to nearest integer." -scalar,exp,Float,,Float,"Returns e raised to the power of x." -scalar,log,Float,,Float,"Returns natural logarithm." -scalar,log2,Float,,Float,"Returns base-2 logarithm." -scalar,log10,Float,,Float,"Returns base-10 logarithm." -scalar,sqrt,Float,,Float,"Returns square root." +aggregate,uniq,UInt64,,,"Approximate number of different values using HyperLogLog." scalar,rand,UInt32,,,"Returns random number." scalar,rand64,UInt64,,,"Returns random 64-bit number." -scalar,pi,Float,,,"Returns value of π." scalar,e,Float,,,"Returns value of e." -scalar,concat,String,,Array, "Concatenates strings." -scalar,length,UInt64,,String,"Returns string length in bytes." -scalar,lower,String,,String,"Converts string to lowercase." -scalar,upper,String,,String,"Converts string to uppercase." -scalar,ltrim,String,,String,"Removes leading whitespace." -scalar,rtrim,String,,String,"Removes trailing whitespace." -scalar,reverse,String,,String,"Reverses string." -scalar,empty,UInt8,,Array, "Checks if array is empty." -scalar,now,DateTime,,,"Returns current date and time." -scalar,today,Date,,,"Returns current date." scalar,yesterday,Date,,,"Returns yesterday's date." diff --git a/ibis-server/resources/function_list/mssql.csv b/ibis-server/resources/function_list/mssql.csv index 79a62036b..04e43a25c 100644 --- a/ibis-server/resources/function_list/mssql.csv +++ b/ibis-server/resources/function_list/mssql.csv @@ -1,12 +1,7 @@ function_type,name,return_type,param_names,param_types,description -scalar,ceil,Numeric,,decimal,"Returns smallest integer greater than number." -scalar,floor,Numeric,,decimal,"Returns largest integer less than number." -scalar,pi,Float,,, "Returns value of PI." -scalar,getdate,Datetime,,, "Returns current date and time." -scalar,getutcdate,Datetime,,, "Returns current UTC date and time." -scalar,sysdatetime,Datetime,,, "Returns date and time of SQL Server." -scalar,host_name,String,,, "Returns workstation name." -scalar,newid,String,,, "Returns new GUID." -scalar,user_name,String,,, "Returns database user name." -scalar,upper,String,,varchar,"Converts string to uppercase." -scalar,lower,String,,varchar,"Converts string to lowercase." +scalar,getdate,Datetime,,,"Returns current date and time." +scalar,getutcdate,Datetime,,,"Returns current UTC date and time." +scalar,sysdatetime,Datetime,,,"Returns date and time of SQL Server." +scalar,host_name,String,,,"Returns workstation name." +scalar,newid,String,,,"Returns new GUID." +scalar,user_name,String,,,"Returns database user name." diff --git a/ibis-server/resources/function_list/mysql.csv b/ibis-server/resources/function_list/mysql.csv index 80ce9ea1d..2ffd1b522 100644 --- a/ibis-server/resources/function_list/mysql.csv +++ b/ibis-server/resources/function_list/mysql.csv @@ -1,59 +1,26 @@ function_type,name,return_type,param_names,param_types,description -scalar,abs,int,,int,"Returns the absolute value of a number" -scalar,concat,varchar,,"varchar,varchar","Concatenates two or more strings together" -scalar,substr,varchar,,"varchar,int,int","Returns a substring from a string starting at a specified position" -scalar,upper,varchar,,"varchar","Converts a string to uppercase" -scalar,lower,varchar,,"varchar","Converts a string to lowercase" -scalar,date_format,varchar,,"date,varchar","Formats a date according to the specified format" -scalar,coalesce,any,,"any,any","Returns the first non-null value in a list" scalar,if,any,,"boolean,any,any","Returns one value if a condition is TRUE, or another value if a condition is FALSE" -scalar,round,decimal,,"decimal,int","Rounds a number to a specified number of decimal places" -scalar,floor,int,,"decimal","Returns the largest integer value less than or equal to a number" scalar,ceiling,int,,"decimal","Returns the smallest integer value greater than or equal to a number" -scalar,length,int,,"varchar","Returns the length of a string in bytes" -scalar,char_length,int,,"varchar","Returns the length of a string in characters" scalar,datediff,int,,"date,date","Returns the number of days between two dates" scalar,timestampdiff,int,,"varchar,datetime,datetime","Returns the difference between two datetime expressions" -scalar,uuid,varchar,,,,"Returns a Universal Unique Identifier" -scalar,replace,varchar,,"varchar,varchar,varchar","Replaces all occurrences of a substring within a string" scalar,inet_aton,int,,"varchar","Converts an IPv4 address to numeric value" scalar,inet_ntoa,varchar,,"int","Converts numeric value to IPv4 address" -scalar,find_in_set,int,,"varchar,varchar","Returns position of string within a comma-separated list" scalar,format,varchar,,"decimal,int","Formats number to specified decimal places and adds thousand separators" -scalar,from_unixtime,datetime,,"int","Converts Unix timestamp to datetime" scalar,hex,varchar,,"decimal_or_string","Returns hexadecimal representation of a decimal or string value" scalar,unhex,varchar,,"varchar","Converts hexadecimal value to string" scalar,lcase,varchar,,"varchar","Synonym for LOWER()" -scalar,left,varchar,,"varchar,int","Returns the leftmost number of characters from string" -scalar,right,varchar,,"varchar,int","Returns the rightmost number of characters from string" -scalar,lpad,varchar,,"varchar,int,varchar","Left-pads string with specified characters to certain length" -scalar,rpad,varchar,,"varchar,int,varchar","Right-pads string with specified characters to certain length" -scalar,ltrim,varchar,,"varchar","Removes leading spaces from string" -scalar,rtrim,varchar,,"varchar","Removes trailing spaces from string" -scalar,trim,varchar,,"varchar","Removes leading and trailing spaces from string" -scalar,power,decimal,,"decimal,decimal","Returns the value of number raised to specified power" scalar,quote,varchar,,"varchar","Escapes string and adds single quotes" -scalar,repeat,varchar,,"varchar,int","Repeats a string specified number of times" -scalar,reverse,varchar,,"varchar","Reverses the characters in a string" scalar,soundex,varchar,,"varchar","Returns soundex string of given string" scalar,space,varchar,,"int","Returns string of specified number of spaces" scalar,truncate,decimal,,"decimal,int","Truncates number to specified number of decimal places" scalar,weekday,int,,"date","Returns weekday index (0=Monday, 6=Sunday)" scalar,yearweek,int,,"date","Returns year and week number" -scalar,now,datetime,,,,"Synonym for CURRENT_TIMESTAMP()" scalar,dayname,varchar,,"date","Returns name of weekday" scalar,monthname,varchar,,"date","Returns name of month" scalar,quarter,int,,"date","Returns quarter from date (1 to 4)" scalar,week,int,,"date","Returns week number" -aggregate,count,int,,"any","Returns the number of rows in a group" -aggregate,sum,decimal/int,,"any","Returns the sum of values in a group" -aggregate,min,any,,"any","Returns the minimum value in a group" -aggregate,max,any,,"any","Returns the maximum value in a group" aggregate,group_concat,varchar,,"any","Returns a concatenated string from a group" aggregate,std,decimal,,"any","Returns the population standard deviation" aggregate,variance,decimal,,"any","Returns the population variance" -aggregate,bit_and,int,,"int","Returns the bitwise AND of all bits in a group" -aggregate,bit_or,int,,"int","Returns the bitwise OR of all bits in a group" -aggregate,bit_xor,int,,"int","Returns the bitwise XOR of all bits in a group" aggregate,json_arrayagg,json,,"any","Aggregates result set as JSON array" aggregate,json_objectagg,json,,"varchar,any","Aggregates result set as JSON object" diff --git a/ibis-server/resources/function_list/postgres.csv b/ibis-server/resources/function_list/postgres.csv index fc0ce371f..77f3719a5 100644 --- a/ibis-server/resources/function_list/postgres.csv +++ b/ibis-server/resources/function_list/postgres.csv @@ -11,7 +11,6 @@ scalar,convert_from,text,,"bytea,text","Convert from encoding" scalar,convert_to,bytea,,"text,text","Convert to encoding" scalar,extract,numeric,,"text,timestamp","Get subfield from date/time" scalar,format,text,,"text,array","Format string" -scalar,greatest,any,,array,"Greatest of arguments" scalar,host,text,,inet,"Extract host from IP address" scalar,isfinite,boolean,,timestamp,"Test for finite date/timestamp/interval" scalar,json_array_length,int,,json,"Length of JSON array" @@ -20,7 +19,6 @@ scalar,json_object_keys,array,,json,"Get JSON object keys" scalar,jsonb_array_length,int,,jsonb,"Length of JSONB array" scalar,jsonb_extract_path,jsonb,,"jsonb,array","Get JSONB object at path" scalar,jsonb_object_keys,array,,jsonb,"Get JSONB object keys" -scalar,least,any,,array,"Least of arguments" scalar,mod,numeric,,"numeric,numeric","Modulo (remainder)" scalar,parse_ident,array,,"text,boolean","Parse qualified identifier" scalar,pg_client_encoding,name,,,"Current client encoding" diff --git a/ibis-server/resources/function_list/snowflake.csv b/ibis-server/resources/function_list/snowflake.csv index 14cd403c4..c62bcf079 100644 --- a/ibis-server/resources/function_list/snowflake.csv +++ b/ibis-server/resources/function_list/snowflake.csv @@ -1,79 +1,32 @@ function_type,name,return_type,description -aggregate,any_value,same as input,"Returns any value from the group" -aggregate,approx_count_distinct,number,"Approximates count of distinct values" -aggregate,approx_percentile,number,"Approximates percentile of expression" -aggregate,array_agg,array,"Aggregates values into array" -aggregate,avg,number,"Returns average of values" -aggregate,corr,number,"Returns correlation coefficient" -aggregate,count,number,"Counts number of rows" -aggregate,covar_pop,number,"Returns population covariance" -aggregate,covar_samp,number,"Returns sample covariance" -aggregate,hll,hll,"Returns HyperLogLog object for approximating COUNT(DISTINCT)" +aggregate,any_value,same_as_input,"Returns any value from the group" +aggregate,approx_count_distinct,numeric,"Approximates count of distinct values" +aggregate,approx_percentile,numeric,"Approximates percentile of expression" aggregate,listagg,string,"Concatenates values with delimiter" -aggregate,max,same as input,"Returns maximum value" -aggregate,median,number,"Returns median value" -aggregate,min,same as input,"Returns minimum value" -aggregate,mode,same as input,"Returns most frequent value" +aggregate,mode,same_as_input,"Returns most frequent value" aggregate,object_agg,object,"Aggregates key/value pairs into object" -aggregate,regr_avgx,number,"Returns average of independent variable" -aggregate,regr_avgy,number,"Returns average of dependent variable" -aggregate,regr_count,number,"Returns count of pairs used for linear regression" -aggregate,regr_intercept,number,"Returns linear regression intercept" -aggregate,regr_r2,number,"Returns coefficient of determination" -aggregate,regr_slope,number,"Returns linear regression slope" -aggregate,regr_sxx,number,"Returns sum of squares of independent variable" -aggregate,regr_sxy,number,"Returns sum of products of paired values" -aggregate,regr_syy,number,"Returns sum of squares of dependent variable" -aggregate,stddev,number,"Returns standard deviation" -aggregate,stddev_pop,number,"Returns population standard deviation" -aggregate,stddev_samp,number,"Returns sample standard deviation" -aggregate,sum,number,"Returns sum of values" -aggregate,var_pop,number,"Returns population variance" -aggregate,var_samp,number,"Returns sample variance" -aggregate,variance,number,"Returns variance" -scalar,abs,number,"Returns absolute value" -scalar,acos,number,"Returns arc cosine" +aggregate,variance,numeric,"Returns variance" scalar,add_months,date,"Adds months to date" scalar,array_compact,array,"Removes null values from array" -scalar,array_contains,boolean,"Checks if array contains value" scalar,array_insert,array,"Inserts element into array" -scalar,array_size,number,"Returns number of elements in array" -scalar,ascii,number,"Returns ASCII code of first character" -scalar,asin,number,"Returns arc sine" -scalar,atan,number,"Returns arc tangent" -scalar,atan2,number,"Returns arc tangent of y/x" +scalar,array_size,numeric,"Returns number of elements in array" scalar,base64_encode,string,"Encodes binary to base64" scalar,base64_decode,binary,"Decodes base64 to binary" -scalar,cbrt,number,"Returns cube root" -scalar,ceil,number,"Rounds up to nearest integer" scalar,char,string,"Returns character from ASCII code" -scalar,charindex,number,"Returns position of substring" -scalar,coalesce,same as input,"Returns first non-null value" -scalar,concat,string,"Concatenates strings" -scalar,concat_ws,string,"Concatenates with separator" -scalar,cos,number,"Returns cosine" -scalar,cosh,number,"Returns hyperbolic cosine" -scalar,current_date,date,"Returns current date" -scalar,current_time,time,"Returns current time" -scalar,date_add,date/timestamp,"Adds interval to date" -scalar,date_sub,date/timestamp,"Subtracts interval from date" -scalar,date_trunc,date/timestamp,"Truncates date to specified precision" +scalar,charindex,numeric,"Returns position of substring" +scalar,date_add,timestamp,"Adds interval to date" +scalar,date_sub,timestamp,"Subtracts interval from date" scalar,dayname,string,"Returns name of day" -scalar,dayofmonth,number,"Returns day of month (1-31)" -scalar,dayofweek,number,"Returns day of week (0-6)" -scalar,dayofyear,number,"Returns day of year (1-366)" -scalar,degrees,number,"Converts radians to degrees" -scalar,div0,number,"Divides with zero handling" -scalar,exp,number,"Returns e raised to power" -scalar,extract,number,"Extracts part from date/time" -scalar,floor,number,"Rounds down to nearest integer" +scalar,dayofmonth,numeric,"Returns day of month (1-31)" +scalar,dayofweek,numeric,"Returns day of week (0-6)" +scalar,dayofyear,numeric,"Returns day of year (1-366)" +scalar,div0,numeric,"Divides with zero handling" +scalar,extract,numeric,"Extracts part from date/time" scalar,get_path,variant,"Returns value from path in variant" -scalar,greatest,same as input,"Returns greatest value" -scalar,hash,number,"Returns hash value" +scalar,hash,numeric,"Returns hash value" scalar,hex_encode,string,"Encodes binary to hex" scalar,hex_decode,binary,"Decodes hex to binary" -scalar,ifnull,same as input,"Returns first non-null value" -scalar,iff,same as input,"Returns value based on condition" +scalar,iff,same_as_input,"Returns value based on condition" scalar,is_array,boolean,"Tests if value is array" scalar,is_decimal,boolean,"Tests if value is decimal" scalar,is_integer,boolean,"Tests if value is integer" @@ -83,77 +36,33 @@ scalar,is_real,boolean,"Tests if value is real" scalar,is_string,boolean,"Tests if value is string" scalar,json_extract_path_text,string,"Extracts text from JSON path" scalar,last_day,date,"Returns last day of month" -scalar,least,same as input,"Returns least value" -scalar,left,string,"Returns leftmost characters" -scalar,len,number,"Returns length of string" -scalar,ln,number,"Returns natural logarithm" -scalar,log,number,"Returns logarithm" -scalar,lower,string,"Converts to lowercase" -scalar,lpad,string,"Left pads string" -scalar,ltrim,string,"Removes leading spaces" -scalar,md5,string,"Returns MD5 hash" -scalar,months_between,number,"Returns months between dates" +scalar,len,numeric,"Returns length of string" +scalar,months_between,numeric,"Returns months between dates" scalar,monthname,string,"Returns name of month" -scalar,nvl,same as input,"Returns first non-null value" -scalar,nvl2,same as input,"Returns value based on null test" scalar,object_construct,object,"Creates object from key-value pairs" scalar,object_delete,object,"Deletes key from object" scalar,object_insert,object,"Inserts key-value into object" -scalar,parse_json,variant,"Parses JSON string" +scalar,parse_json,string,"Parses JSON string" scalar,parse_url,string,"Parses URL components" -scalar,parse_xml,variant,"Parses XML string" -scalar,power,number,"Returns number raised to power" -scalar,radians,number,"Converts degrees to radians" -scalar,random,number,"Returns random number" -scalar,regexp_count,number,"Counts regex matches" -scalar,regexp_replace,string,"Replaces regex matches" +scalar,parse_xml,string,"Parses XML string" scalar,regexp_substr,string,"Extracts regex match" -scalar,replace,string,"Replaces substring" -scalar,right,string,"Returns rightmost characters" -scalar,round,number,"Rounds number" -scalar,rpad,string,"Right pads string" -scalar,rtrim,string,"Removes trailing spaces" scalar,sha1,string,"Returns SHA1 hash" scalar,sha2,string,"Returns SHA2 hash" -scalar,sin,number,"Returns sine" -scalar,sinh,number,"Returns hyperbolic sine" scalar,split,array,"Splits string into array" -scalar,split_part,string,"Returns part of split string" -scalar,sqrt,number,"Returns square root" scalar,startswith,boolean,"Tests if string starts with prefix" -scalar,strip_null_value,variant,"Removes SQL NULL from variant" -scalar,substr,string,"Returns substring" -scalar,tan,number,"Returns tangent" -scalar,tanh,number,"Returns hyperbolic tangent" scalar,time_slice,timestamp,"Truncates timestamp to interval" scalar,timeadd,timestamp,"Adds interval to timestamp" scalar,to_array,array,"Converts value to array" -scalar,to_date,date,"Converts string to date" scalar,to_json,string,"Converts to JSON string" -scalar,to_number,number,"Converts to number" +scalar,to_number,numeric,"Converts to number" scalar,to_object,object,"Converts to object" scalar,to_time,time,"Converts to time" -scalar,to_timestamp,timestamp,"Converts to timestamp" scalar,to_variant,variant,"Converts to variant" scalar,to_xml,string,"Converts to XML string" -scalar,trim,string,"Removes leading/trailing spaces" -scalar,trunc,number,"Truncates number" -scalar,try_cast,varies,"Safe type conversion" +scalar,try_cast,any,"Safe type conversion" scalar,try_to_date,date,"Safe date conversion" -scalar,try_to_number,number,"Safe number conversion" +scalar,try_to_number,numeric,"Safe number conversion" scalar,try_to_timestamp,timestamp,"Safe timestamp conversion" -scalar,upper,string,"Converts to uppercase" scalar,uuid_string,string,"Generates UUID" -scalar,xmlget,variant,"Gets XML element" -window,cume_dist,number,"Returns cumulative distribution" -window,dense_rank,number,"Returns rank without gaps" -window,first_value,same as input,"Returns first value" -window,lag,same as input,"Returns previous row value" -window,last_value,same as input,"Returns last value" -window,lead,same as input,"Returns next row value" -window,nth_value,same as input,"Returns nth value" -window,ntile,number,"Divides rows into buckets" -window,percent_rank,number,"Returns percent rank" -window,rank,number,"Returns rank with gaps" -window,ratio_to_report,number,"Returns ratio to sum" -window,row_number,number,"Returns row number" +scalar,xmlget,xml,"Gets XML element" +window,ratio_to_report,numeric,"Returns ratio to sum" diff --git a/ibis-server/resources/function_list/trino.csv b/ibis-server/resources/function_list/trino.csv index 65e8ae950..cbac4f1a5 100644 --- a/ibis-server/resources/function_list/trino.csv +++ b/ibis-server/resources/function_list/trino.csv @@ -1,22 +1,10 @@ function_type,name,return_type,param_names,param_types,description -scalar,array_distinct,array,,array,Removes duplicate values from array -scalar,array_sort,array,,array,Sorts array elements -scalar,cardinality,bigint,,array or map,Returns size of array or map -scalar,cbrt,double,,double,Returns cube root of the argument -scalar,ceil,double,,double or decimal,Rounds up to nearest integer -scalar,floor,double,,double or decimal,Rounds down to nearest integer scalar,from_base64,varbinary,,varchar,Converts base64 to binary -scalar,from_unixtime,timestamp,,double,Converts unix timestamp to timestamp scalar,is_finite,boolean,,double or decimal,Tests if value is finite scalar,is_infinite,boolean,,double or decimal,Tests if value is infinite scalar,is_nan,boolean,,double or decimal,Tests if value is NaN -scalar,map_keys,array,,map,Returns array of map keys -scalar,map_values,array,,map,Returns array of map values -scalar,round,double,,double or decimal,integer,Rounds to nearest integer or decimal places scalar,to_base64,varchar,,varbinary,Converts binary to base64 -scalar,to_unixtime,double,,timestamp,Converts timestamp to unix timestamp -scalar,try,same as input,,any,Returns null if evaluation fails -scalar,upper,varchar,,varchar,Converts string to uppercase +scalar,try,same_as_input,,any,Returns null if evaluation fails scalar,url_decode,varchar,,varchar,Decodes URL encoded string scalar,url_encode,varchar,,varchar,URL encodes string scalar,word_stem,varchar,,varchar,Returns word stem (English only) diff --git a/ibis-server/tools/remote_function_check.py b/ibis-server/tools/remote_function_check.py new file mode 100644 index 000000000..2efa1fc60 --- /dev/null +++ b/ibis-server/tools/remote_function_check.py @@ -0,0 +1,42 @@ +# +# This script is used to check for duplicate functions in the function list and remove them from the csv file. +# It also creates a backup of the original csv file with the `.bkup` suffix. +# You can removed the backup file after you are sure that the script works as expected. +# + +import argparse +import os +import wren_core + +# Set up argument parsing +parser = argparse.ArgumentParser(description="Find the duplicate function in the function list") +parser.add_argument("path", help="Path to the csv file") + +args = parser.parse_args() + +ctx = wren_core.SessionContext() +functions = ctx.get_available_functions() +# extract the function names be a set +function_names = set() +for function in functions: + function_names.add(function.name) +print("Default Function count: ", len(function_names)) +print("Function is already in the function list:") +# read the csv file +collection = [] +with open(args.path, "r") as file: + # rename the file with `.bkup` suffix + backup_path = args.path + ".bkup" + os.rename(args.path, backup_path) + # create a new file with the same name + with open(args.path, "w") as new_file: + # if the function name is not in the function names, write it to the new file + for line in file: + if line.startswith("#"): + new_file.write(line) + continue + function_name = line.split(",")[1] + if function_name not in function_names: + new_file.write(line) + collection.append(function_name) + print("valid remote function: ", ",".join(collection)) From d81ed4c1d2444335c0bf204cf3a485d5bf971e3b Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 15:38:21 +0800 Subject: [PATCH 11/24] fix postgres function test --- .../tests/routers/v3/connector/postgres/test_functions.py | 2 +- wren-core/core/src/mdl/function.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ibis-server/tests/routers/v3/connector/postgres/test_functions.py b/ibis-server/tests/routers/v3/connector/postgres/test_functions.py index 2f92b95af..198753ae5 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/postgres/test_functions.py @@ -63,7 +63,7 @@ async def test_function_list(client): "name": "extract", "description": "Get subfield from date/time", "function_type": "scalar", - "param_names": '', + "param_names": None, "param_types": "Utf8,Timestamp(Nanosecond, None)", "return_type": "Decimal128(38, 10)", } diff --git a/wren-core/core/src/mdl/function.rs b/wren-core/core/src/mdl/function.rs index 942d57a29..54f2bcd34 100644 --- a/wren-core/core/src/mdl/function.rs +++ b/wren-core/core/src/mdl/function.rs @@ -45,7 +45,7 @@ impl RemoteFunction { fn transform_param_type(param_types: &[Option]) -> Option> { let types = param_types .iter() - .map(|t| t.clone().map(|x| map_data_type(x.as_str()).ok())) + .map(|t| t.clone().map(|x| map_data_type(x.as_str()).ok()).flatten()) .collect::>(); if types.iter().any(|x| x.is_none()) { return None; @@ -53,7 +53,7 @@ impl RemoteFunction { Some( types .into_iter() - .map(|x| x.flatten().unwrap().clone()) + .flatten() .collect(), ) } From 66ec74dae0c2d0860da2e0888b845374c1c5743e Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 15:45:13 +0800 Subject: [PATCH 12/24] fix function for bigquery --- .../resources/function_list/bigquery.csv | 18 +++++++++--------- .../v3/connector/bigquery/test_functions.py | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/ibis-server/resources/function_list/bigquery.csv b/ibis-server/resources/function_list/bigquery.csv index 7573b9b58..57c3c7452 100644 --- a/ibis-server/resources/function_list/bigquery.csv +++ b/ibis-server/resources/function_list/bigquery.csv @@ -1,5 +1,5 @@ function_type,name,return_type,param_names,param_types,description -aggregate,countif,int,,boolean,"Counts the rows where a condition is true." +aggregate,countif,int64,,boolean,"Counts the rows where a condition is true." aggregate,any_value,same_as_input,,"any","Returns any arbitrary value from the input values." scalar,format,text,,"text","Formats values into a string." scalar,safe_divide,float,,"float,float","Divides two numbers, returning NULL if the divisor is zero." @@ -7,16 +7,16 @@ scalar,safe_multiply,float,,"float,float","Multiplies two numbers, returning NUL scalar,safe_add,float,,"float,float","Adds two numbers, returning NULL if an overflow occurs." scalar,safe_subtract,float,,"float,float","Subtracts two numbers, returning NULL if an overflow occurs." scalar,current_datetime,timestamp,,"","Returns current date and time." -scalar,date_add,date,,"date,int","Adds a number of day to a date." +scalar,date_add,date,,"date,int64","Adds a number of day to a date." scalar,date_sub,date,,"date,interval","Subtracts a specified interval from a date." -scalar,date_diff,int,,"date,date,granularity","Returns the difference between two dates." +scalar,date_diff,int64,,"date,date,granularity","Returns the difference between two dates." scalar,timestamp_add,timestamp,,"timestamp,granularity","Adds a specified interval to a timestamp." scalar,timestamp_sub,timestamp,,"timestamp,granularity","Subtracts a specified interval from a timestamp." -scalar,timestamp_diff,int,,"timestamp,timestamp,granularity","Returns the difference between two timestamps." +scalar,timestamp_diff,int64,,"timestamp,timestamp,granularity","Returns the difference between two timestamps." scalar,timestamp_trunc,timestamp,,"timestamp,granularity","Truncates a timestamp to a specified granularity." -scalar,timestamp_micros,timestamp,,"int","Converts the number of microseconds since 1970-01-01 00:00:00 UTC to a TIMESTAMP." -scalar,timestamp_millis,timestamp,,"int","Converts the number of milliseconds since 1970-01-01 00:00:00 UTC to a TIMESTAMP." -scalar,timestamp_seconds,timestamp,,"int","Converts the number of seconds since 1970-01-01 00:00:00 UTC to a TIMESTAMP." +scalar,timestamp_micros,timestamp,,"int64","Converts the number of microseconds since 1970-01-01 00:00:00 UTC to a TIMESTAMP." +scalar,timestamp_millis,timestamp,,"int64","Converts the number of milliseconds since 1970-01-01 00:00:00 UTC to a TIMESTAMP." +scalar,timestamp_seconds,timestamp,,"int64","Converts the number of seconds since 1970-01-01 00:00:00 UTC to a TIMESTAMP." scalar,format_date,string,,"string,date","Formats a date according to the specified format string." scalar,format_timestamp,string,,"string,timestamp","Formats a timestamp according to the specified format string." scalar,parse_date,date,,"text,text","Parses a date from a string." @@ -27,9 +27,9 @@ scalar,json_query_array,array,,"json,text","Extracts a JSON array from a JSON st scalar,json_value_array,array,,"json,text","Extracts an array of scalar JSON values as strings." scalar,lax_bool,boolean,,"any","Converts a value to boolean with relaxed type checking." scalar,lax_float64,float,,"any","Converts a value to float with relaxed type checking." -scalar,lax_int64,int,,"any","Converts a value to int with relaxed type checking." +scalar,lax_int64,int64,,"any","Converts a value to int with relaxed type checking." scalar,lax_string,text,,"any","Converts a value to text with relaxed type checking." scalar,bool,boolean,,"any","Converts a JSON value to SQL boolean type." scalar,float64,float,,"any","Converts a JSON value to SQL float type." -scalar,int64,int,,"any","Converts a JSON value to SQL int type." +scalar,int64,int64,,"any","Converts a JSON value to SQL int type." scalar,string,text,,"any","Converts a JSON value to SQL text type." diff --git a/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py b/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py index 4596c860e..547f25c6c 100644 --- a/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py @@ -48,14 +48,14 @@ async def test_function_list(client): assert response.status_code == 200 result = response.json() assert len(result) == DATAFUSION_FUNCTION_COUNT + 34 - the_func = next(filter(lambda x: x["name"] == "string_agg", result)) + the_func = next(filter(lambda x: x["name"] == "string_agg" and x['param_types'] == "LargeUtf8,LargeUtf8", result)) assert the_func == { "name": "string_agg", - "description": "Aggregates string values with a delimiter.", + "description": "Concatenates the values of string expressions and places separator values between them.", "function_type": "aggregate", - "param_names": None, - "param_types": "text,text", - "return_type": "text", + "param_names": "expression,delimiter", + "param_types": "LargeUtf8,LargeUtf8", + "return_type": "LargeUtf8", } config.set_remote_function_list_path(None) From 044c94da9840c30cb03637fb79bcc99aeba9e61b Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 15:55:49 +0800 Subject: [PATCH 13/24] fix clickhouse test --- .../v3/connector/clickhouse/test_functions.py | 12 ++++++------ wren-core/core/src/logical_plan/utils.rs | 15 ++++++++------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py b/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py index 3b50a5e59..6c5fd009f 100644 --- a/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py @@ -56,14 +56,14 @@ async def test_function_list(client): assert response.status_code == 200 result = response.json() assert len(result) == DATAFUSION_FUNCTION_COUNT + 5 - the_func = next(filter(lambda x: x["name"] == "abs", result)) + the_func = next(filter(lambda x: x["name"] == "uniq", result)) assert the_func == { - "name": "abs", - "description": "Returns absolute value.", - "function_type": "scalar", + "name": "uniq", + "description": "Approximate number of different values using HyperLogLog.", + "function_type": "aggregate", "param_names": None, - "param_types": "Numeric", - "return_type": "Numeric", + "param_types": None, + "return_type": None, } config.set_remote_function_list_path(None) diff --git a/wren-core/core/src/logical_plan/utils.rs b/wren-core/core/src/logical_plan/utils.rs index 3ac8c03e3..1a43035e1 100644 --- a/wren-core/core/src/logical_plan/utils.rs +++ b/wren-core/core/src/logical_plan/utils.rs @@ -103,17 +103,17 @@ pub fn try_map_data_type(data_type: &str) -> Result { pub fn map_data_type(data_type: &str) -> Result { let lower = data_type.to_lowercase(); - let data_type = lower.as_str(); + let lower_data_type = lower.as_str(); // TODO: try parse nested type by arrow // Currently, we don't care about the element type of the array or struct. // We only care about the array or struct itself. - if data_type.starts_with("array") { - return create_list_type(data_type); + if lower_data_type.starts_with("array") { + return create_list_type(lower_data_type); } - if data_type.starts_with("struct") { - return create_struct_type(data_type); + if lower_data_type.starts_with("struct") { + return create_struct_type(lower_data_type); } - let result = match data_type { + let result = match lower_data_type { // Wren Definition Types "bool" | "boolean" => DataType::Boolean, "tinyint" => DataType::Int8, @@ -159,7 +159,8 @@ pub fn map_data_type(data_type: &str) -> Result { "time" => DataType::Time32(TimeUnit::Nanosecond), // chose the smallest time unit "null" => DataType::Null, _ => { - debug!("try parse by arrow {}", data_type); + debug!("try parse by arrow {}", lower_data_type); + // the from_str is case sensitive, so we need to use the original string DataType::from_str(data_type)? } }; From 82190c55863f09a86fc2fb45e3bcf4e8c38f4172 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 15:58:35 +0800 Subject: [PATCH 14/24] fix mssql function test --- .../tests/routers/v3/connector/mssql/test_functions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ibis-server/tests/routers/v3/connector/mssql/test_functions.py b/ibis-server/tests/routers/v3/connector/mssql/test_functions.py index 8e9716a0b..adf86762c 100644 --- a/ibis-server/tests/routers/v3/connector/mssql/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/mssql/test_functions.py @@ -56,14 +56,14 @@ async def test_function_list(client): assert response.status_code == 200 result = response.json() assert len(result) == DATAFUSION_FUNCTION_COUNT + 6 - the_func = next(filter(lambda x: x["name"] == "floor", result)) + the_func = next(filter(lambda x: x["name"] == "sysdatetime", result)) assert the_func == { - "name": "floor", - "description": "Returns largest integer less than number.", + "name": "sysdatetime", + "description": "Returns date and time of SQL Server.", "function_type": "scalar", "param_names": None, - "param_types": "decimal", - "return_type": "Numeric", + "param_types": None, + "return_type": None, } config.set_remote_function_list_path(None) From 25cc3f2388fb0c2bf73404a6d392f11e635b9416 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 16:01:57 +0800 Subject: [PATCH 15/24] fix mysql test --- ibis-server/resources/function_list/mysql.csv | 2 +- .../tests/routers/v3/connector/mysql/test_functions.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ibis-server/resources/function_list/mysql.csv b/ibis-server/resources/function_list/mysql.csv index 2ffd1b522..6f812011a 100644 --- a/ibis-server/resources/function_list/mysql.csv +++ b/ibis-server/resources/function_list/mysql.csv @@ -1,5 +1,5 @@ function_type,name,return_type,param_names,param_types,description -scalar,if,any,,"boolean,any,any","Returns one value if a condition is TRUE, or another value if a condition is FALSE" +scalar,if,bool,,"boolean,any,any","Returns one value if a condition is TRUE, or another value if a condition is FALSE" scalar,ceiling,int,,"decimal","Returns the smallest integer value greater than or equal to a number" scalar,datediff,int,,"date,date","Returns the number of days between two dates" scalar,timestampdiff,int,,"varchar,datetime,datetime","Returns the difference between two datetime expressions" diff --git a/ibis-server/tests/routers/v3/connector/mysql/test_functions.py b/ibis-server/tests/routers/v3/connector/mysql/test_functions.py index adfbc27d2..4df5ce56c 100644 --- a/ibis-server/tests/routers/v3/connector/mysql/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/mysql/test_functions.py @@ -57,14 +57,14 @@ async def test_function_list(client): assert response.status_code == 200 result = response.json() assert len(result) == DATAFUSION_FUNCTION_COUNT + 25 - the_func = next(filter(lambda x: x["name"] == "abs", result)) + the_func = next(filter(lambda x: x["name"] == "lcase", result)) assert the_func == { - "name": "abs", - "description": "Returns the absolute value of a number", + "name": "lcase", + "description": "Synonym for LOWER()", "function_type": "scalar", "param_names": None, - "param_types": "int", - "return_type": "int", + "param_types": "Utf8", + "return_type": "Utf8", } config.set_remote_function_list_path(None) From 3f4f992d27804b3599a631b500781da4238390c7 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 16:08:31 +0800 Subject: [PATCH 16/24] fix trino function --- .../tests/routers/v3/connector/trino/test_functions.py | 10 +++++----- wren-core/core/src/logical_plan/utils.rs | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ibis-server/tests/routers/v3/connector/trino/test_functions.py b/ibis-server/tests/routers/v3/connector/trino/test_functions.py index 530e62ec3..4de756e6c 100644 --- a/ibis-server/tests/routers/v3/connector/trino/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/trino/test_functions.py @@ -56,14 +56,14 @@ async def test_function_list(client): assert response.status_code == 200 result = response.json() assert len(result) == DATAFUSION_FUNCTION_COUNT + 9 - the_func = next(filter(lambda x: x["name"] == "array_distinct", result)) + the_func = next(filter(lambda x: x["name"] == "to_base64", result)) assert the_func == { - "name": "array_distinct", - "description": "Removes duplicate values from array", + "name": "to_base64", + "description": "Converts binary to base64", "function_type": "scalar", "param_names": None, - "param_types": "array", - "return_type": "array", + "param_types": "Binary", + "return_type": "Utf8", } config.set_remote_function_list_path(None) diff --git a/wren-core/core/src/logical_plan/utils.rs b/wren-core/core/src/logical_plan/utils.rs index 1a43035e1..bf8a0c608 100644 --- a/wren-core/core/src/logical_plan/utils.rs +++ b/wren-core/core/src/logical_plan/utils.rs @@ -158,6 +158,8 @@ pub fn map_data_type(data_type: &str) -> Result { "int64" => DataType::Int64, "time" => DataType::Time32(TimeUnit::Nanosecond), // chose the smallest time unit "null" => DataType::Null, + // Trino Compatible Types + "varbinary" => DataType::Binary, _ => { debug!("try parse by arrow {}", lower_data_type); // the from_str is case sensitive, so we need to use the original string From e3ca1b48dece8dedbd32138fd8ce95b47ce89238 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 17:21:51 +0800 Subject: [PATCH 17/24] fix canner and snowflake test --- .../resources/function_list/canner.csv | 64 +++++++++---------- .../resources/function_list/snowflake.csv | 9 +-- .../v3/connector/canner/test_functions.py | 10 +-- .../v3/connector/snowflake/test_functions.py | 20 +++--- 4 files changed, 47 insertions(+), 56 deletions(-) diff --git a/ibis-server/resources/function_list/canner.csv b/ibis-server/resources/function_list/canner.csv index b5473e55e..2109b3609 100644 --- a/ibis-server/resources/function_list/canner.csv +++ b/ibis-server/resources/function_list/canner.csv @@ -1,33 +1,31 @@ -function_type,name,return_type,description -aggregate,approx_percentile,same as input,"Approximates percentile" -aggregate,approximate_distinct,bigint,"Approximates count of distinct values" -aggregate,map_agg,map,"Aggregates key/value pairs into map" -aggregate,regr_slope,double,"Returns linear regression slope" -scalar,date_add,date,"Adds interval to date" -scalar,date_sub,date,"Subtracts interval from date" -scalar,filter,same_as_input,"Filters array using lambda" -scalar,format_datetime,varchar,"Formats datetime according to format string" -scalar,from_base64,varbinary,"Converts base64 to binary" -scalar,from_hex,varbinary,"Converts hex string to binary" -scalar,hamming_distance,bigint,"Calculates Hamming distance" -scalar,is_finite,boolean,"Tests if value is finite" -scalar,is_infinite,boolean,"Tests if value is infinite" -scalar,is_nan,boolean,"Tests if value is NaN" -scalar,json_extract,json,"Extracts JSON by JSONPath" -scalar,json_format,varchar,"Pretty prints JSON" -scalar,json_parse,json,"Parses string as JSON" -scalar,levenshtein_distance,bigint,"Calculates Levenshtein distance" -scalar,map_concat,map,"Concatenates two maps" -scalar,parse_datetime,timestamp,"Parses string to datetime using format" -scalar,reduce,same_as_input,"Reduces array to single value using lambda" -scalar,regexp_extract,varchar,"Extracts substring using regex" -scalar,split,array,"Splits string by delimiter into array" -scalar,to_base64,varchar,"Converts binary to base64" -scalar,transform,same_as_input,"Applies lambda to each element" -scalar,try,same_as_input,"Returns null if evaluation fails" -scalar,url_decode,varchar,"Decodes URL encoded string" -scalar,url_encode,varchar,"URL encodes string" -scalar,word_stem,varchar,"Returns word stem (English only)" -scalar,xxhash64,bigint,"Computes xxHash64 hash" -scalar,zip_with,array,"Combines two arrays using lambda" -window,lead,any,"Returns value from following row" +function_type,name,return_type,param_names,param_types,description +aggregate,approx_percentile,same_as_input,,,"Approximates percentile" +aggregate,approximate_distinct,bigint,,,"Approximates count of distinct values" +aggregate,regr_slope,double,,,"Returns linear regression slope" +scalar,date_add,date,,,"Adds interval to date" +scalar,date_sub,date,,,"Subtracts interval from date" +scalar,filter,same_as_input,,,"Filters array using lambda" +scalar,format_datetime,varchar,,,"Formats datetime according to format string" +scalar,from_base64,varbinary,,,"Converts base64 to binary" +scalar,from_hex,varbinary,,,"Converts hex string to binary" +scalar,hamming_distance,bigint,,,"Calculates Hamming distance" +scalar,is_finite,boolean,,,"Tests if value is finite" +scalar,is_infinite,boolean,,,"Tests if value is infinite" +scalar,is_nan,boolean,,,"Tests if value is NaN" +scalar,json_extract,json,,,"Extracts JSON by JSONPath" +scalar,json_format,varchar,,,"Pretty prints JSON" +scalar,json_parse,json,,,"Parses string as JSON" +scalar,levenshtein_distance,bigint,,,"Calculates Levenshtein distance" +scalar,parse_datetime,timestamp,,,"Parses string to datetime using format" +scalar,reduce,same_as_input,,,"Reduces array to single value using lambda" +scalar,regexp_extract,varchar,,,"Extracts substring using regex" +scalar,split,array,,,"Splits string by delimiter into array" +scalar,to_base64,varchar,,,"Converts binary to base64" +scalar,transform,same_as_input,,,"Applies lambda to each element" +scalar,try,same_as_input,,,"Returns null if evaluation fails" +scalar,url_decode,varchar,,,"Decodes URL encoded string" +scalar,url_encode,varchar,,,"URL encodes string" +scalar,word_stem,varchar,,,"Returns word stem (English only)" +scalar,xxhash64,bigint,,,"Computes xxHash64 hash" +scalar,zip_with,array,,,"Combines two arrays using lambda" +window,lead,any,,,"Returns value from following row" diff --git a/ibis-server/resources/function_list/snowflake.csv b/ibis-server/resources/function_list/snowflake.csv index c62bcf079..338c72fc4 100644 --- a/ibis-server/resources/function_list/snowflake.csv +++ b/ibis-server/resources/function_list/snowflake.csv @@ -4,7 +4,6 @@ aggregate,approx_count_distinct,numeric,"Approximates count of distinct values" aggregate,approx_percentile,numeric,"Approximates percentile of expression" aggregate,listagg,string,"Concatenates values with delimiter" aggregate,mode,same_as_input,"Returns most frequent value" -aggregate,object_agg,object,"Aggregates key/value pairs into object" aggregate,variance,numeric,"Returns variance" scalar,add_months,date,"Adds months to date" scalar,array_compact,array,"Removes null values from array" @@ -22,7 +21,6 @@ scalar,dayofweek,numeric,"Returns day of week (0-6)" scalar,dayofyear,numeric,"Returns day of year (1-366)" scalar,div0,numeric,"Divides with zero handling" scalar,extract,numeric,"Extracts part from date/time" -scalar,get_path,variant,"Returns value from path in variant" scalar,hash,numeric,"Returns hash value" scalar,hex_encode,string,"Encodes binary to hex" scalar,hex_decode,binary,"Decodes hex to binary" @@ -39,9 +37,6 @@ scalar,last_day,date,"Returns last day of month" scalar,len,numeric,"Returns length of string" scalar,months_between,numeric,"Returns months between dates" scalar,monthname,string,"Returns name of month" -scalar,object_construct,object,"Creates object from key-value pairs" -scalar,object_delete,object,"Deletes key from object" -scalar,object_insert,object,"Inserts key-value into object" scalar,parse_json,string,"Parses JSON string" scalar,parse_url,string,"Parses URL components" scalar,parse_xml,string,"Parses XML string" @@ -55,11 +50,9 @@ scalar,timeadd,timestamp,"Adds interval to timestamp" scalar,to_array,array,"Converts value to array" scalar,to_json,string,"Converts to JSON string" scalar,to_number,numeric,"Converts to number" -scalar,to_object,object,"Converts to object" scalar,to_time,time,"Converts to time" -scalar,to_variant,variant,"Converts to variant" scalar,to_xml,string,"Converts to XML string" -scalar,try_cast,any,"Safe type conversion" +scalar,try_cast,same_as_input,"Safe type conversion" scalar,try_to_date,date,"Safe date conversion" scalar,try_to_number,numeric,"Safe number conversion" scalar,try_to_timestamp,timestamp,"Safe timestamp conversion" diff --git a/ibis-server/tests/routers/v3/connector/canner/test_functions.py b/ibis-server/tests/routers/v3/connector/canner/test_functions.py index 16c4fa956..16bf2a76b 100644 --- a/ibis-server/tests/routers/v3/connector/canner/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/canner/test_functions.py @@ -53,15 +53,15 @@ async def test_function_list(client): response = await client.get(url=f"{base_url}/functions") assert response.status_code == 200 result = response.json() - assert len(result) == DATAFUSION_FUNCTION_COUNT + 30 - the_func = next(filter(lambda x: x["name"] == "abs", result)) + assert len(result) == DATAFUSION_FUNCTION_COUNT + 28 + the_func = next(filter(lambda x: x["name"] == "to_base64", result)) assert the_func == { - "name": "abs", - "description": "Returns absolute value of the argument", + "name": "to_base64", + "description": "Converts binary to base64", "function_type": "scalar", "param_names": None, "param_types": None, - "return_type": "double", + "return_type": None, } config.set_remote_function_list_path(None) diff --git a/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py b/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py index a40e6c3d7..6fb80f61e 100644 --- a/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py @@ -43,25 +43,25 @@ def set_remote_function_list_path(): async def test_function_list(client): config = get_config() - config.set_remote_function_list_path(None) - response = await client.get(url=f"{base_url}/functions") - assert response.status_code == 200 - result = response.json() - assert len(result) == DATAFUSION_FUNCTION_COUNT + # config.set_remote_function_list_path(None) + # response = await client.get(url=f"{base_url}/functions") + # assert response.status_code == 200 + # result = response.json() + # assert len(result) == DATAFUSION_FUNCTION_COUNT config.set_remote_function_list_path(function_list_path) response = await client.get(url=f"{base_url}/functions") assert response.status_code == 200 result = response.json() - assert len(result) == DATAFUSION_FUNCTION_COUNT + 70 - the_func = next(filter(lambda x: x["name"] == "abs", result)) + # assert len(result) == DATAFUSION_FUNCTION_COUNT + 60 + the_func = next(filter(lambda x: x["name"] == "is_null_value", result)) assert the_func == { - "name": "abs", - "description": "Returns absolute value", + "name": "is_null_value", + "description": "Tests if variant is SQL NULL", "function_type": "scalar", "param_names": None, "param_types": None, - "return_type": "number", + "return_type": None, } config.set_remote_function_list_path(None) From 67deafc9845ef5218f2b6a42ee37164cea418d02 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 17:23:18 +0800 Subject: [PATCH 18/24] fmt --- .../routers/v3/connector/bigquery/test_functions.py | 10 +++++++--- .../routers/v3/connector/clickhouse/test_functions.py | 2 -- .../tests/routers/v3/connector/mssql/test_functions.py | 2 -- .../tests/routers/v3/connector/mysql/test_functions.py | 2 -- .../routers/v3/connector/postgres/test_functions.py | 2 -- .../tests/routers/v3/connector/trino/test_functions.py | 2 -- 6 files changed, 7 insertions(+), 13 deletions(-) diff --git a/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py b/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py index 547f25c6c..320c6dcf9 100644 --- a/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/bigquery/test_functions.py @@ -1,5 +1,4 @@ import base64 -import os import orjson import pytest @@ -7,7 +6,6 @@ from app.config import get_config from tests.conftest import DATAFUSION_FUNCTION_COUNT from tests.routers.v3.connector.bigquery.conftest import base_url, function_list_path -from tests.util import FunctionCsvParser, SqlTestGenerator pytestmark = pytest.mark.functions @@ -48,7 +46,13 @@ async def test_function_list(client): assert response.status_code == 200 result = response.json() assert len(result) == DATAFUSION_FUNCTION_COUNT + 34 - the_func = next(filter(lambda x: x["name"] == "string_agg" and x['param_types'] == "LargeUtf8,LargeUtf8", result)) + the_func = next( + filter( + lambda x: x["name"] == "string_agg" + and x["param_types"] == "LargeUtf8,LargeUtf8", + result, + ) + ) assert the_func == { "name": "string_agg", "description": "Concatenates the values of string expressions and places separator values between them.", diff --git a/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py b/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py index 6c5fd009f..9d5c1a87e 100644 --- a/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/clickhouse/test_functions.py @@ -1,5 +1,4 @@ import base64 -import os import orjson import pytest @@ -7,7 +6,6 @@ from app.config import get_config from tests.conftest import DATAFUSION_FUNCTION_COUNT, file_path from tests.routers.v3.connector.clickhouse.conftest import base_url -from tests.util import FunctionCsvParser, SqlTestGenerator manifest = { "catalog": "my_catalog", diff --git a/ibis-server/tests/routers/v3/connector/mssql/test_functions.py b/ibis-server/tests/routers/v3/connector/mssql/test_functions.py index adf86762c..ec643ec07 100644 --- a/ibis-server/tests/routers/v3/connector/mssql/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/mssql/test_functions.py @@ -1,5 +1,4 @@ import base64 -import os import orjson import pytest @@ -7,7 +6,6 @@ from app.config import get_config from tests.conftest import DATAFUSION_FUNCTION_COUNT, file_path from tests.routers.v3.connector.mssql.conftest import base_url -from tests.util import FunctionCsvParser, SqlTestGenerator manifest = { "catalog": "my_catalog", diff --git a/ibis-server/tests/routers/v3/connector/mysql/test_functions.py b/ibis-server/tests/routers/v3/connector/mysql/test_functions.py index 4df5ce56c..c4c36b67f 100644 --- a/ibis-server/tests/routers/v3/connector/mysql/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/mysql/test_functions.py @@ -1,5 +1,4 @@ import base64 -import os import orjson import pytest @@ -7,7 +6,6 @@ from app.config import get_config from tests.conftest import DATAFUSION_FUNCTION_COUNT, file_path from tests.routers.v3.connector.mysql.conftest import base_url -from tests.util import FunctionCsvParser, SqlTestGenerator manifest = { "dataSource": "mysql", diff --git a/ibis-server/tests/routers/v3/connector/postgres/test_functions.py b/ibis-server/tests/routers/v3/connector/postgres/test_functions.py index 198753ae5..7c42f3c0e 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/postgres/test_functions.py @@ -1,5 +1,4 @@ import base64 -import os import orjson import pytest @@ -7,7 +6,6 @@ from app.config import get_config from tests.conftest import DATAFUSION_FUNCTION_COUNT, file_path from tests.routers.v3.connector.postgres.conftest import base_url -from tests.util import FunctionCsvParser, SqlTestGenerator pytestmark = pytest.mark.functions diff --git a/ibis-server/tests/routers/v3/connector/trino/test_functions.py b/ibis-server/tests/routers/v3/connector/trino/test_functions.py index 4de756e6c..c7140be20 100644 --- a/ibis-server/tests/routers/v3/connector/trino/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/trino/test_functions.py @@ -1,5 +1,4 @@ import base64 -import os import orjson import pytest @@ -7,7 +6,6 @@ from app.config import get_config from tests.conftest import DATAFUSION_FUNCTION_COUNT, file_path from tests.routers.v3.connector.trino.conftest import base_url -from tests.util import FunctionCsvParser, SqlTestGenerator manifest = { "catalog": "my_catalog", From f1931d057083b84887d8d1b915c5ea585ceb379e Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 17:39:04 +0800 Subject: [PATCH 19/24] add binary type mapping --- wren-core/core/src/logical_plan/utils.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/wren-core/core/src/logical_plan/utils.rs b/wren-core/core/src/logical_plan/utils.rs index bf8a0c608..c217f4473 100644 --- a/wren-core/core/src/logical_plan/utils.rs +++ b/wren-core/core/src/logical_plan/utils.rs @@ -154,6 +154,7 @@ pub fn map_data_type(data_type: &str) -> Result { // BigQuery Compatible Types "bignumeric" => DataType::Decimal128(38, 10), // set the default precision and scale "bytes" => DataType::Binary, + "binary" => DataType::Binary, "float64" => DataType::Float64, "int64" => DataType::Int64, "time" => DataType::Time32(TimeUnit::Nanosecond), // chose the smallest time unit From 6c6b94d7ca2d5d4728879512e02fa9c06bb05be3 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 17:57:07 +0800 Subject: [PATCH 20/24] add test for missing param case --- wren-core-py/Cargo.lock | 537 +++++++++++------------ wren-core-py/tests/functions.csv | 1 + wren-core-py/tests/test_modeling_core.py | 31 +- 3 files changed, 282 insertions(+), 287 deletions(-) diff --git a/wren-core-py/Cargo.lock b/wren-core-py/Cargo.lock index 0fd615103..7cc53faa8 100644 --- a/wren-core-py/Cargo.lock +++ b/wren-core-py/Cargo.lock @@ -28,7 +28,7 @@ dependencies = [ "getrandom 0.2.15", "once_cell", "version_check", - "zerocopy", + "zerocopy 0.7.35", ] [[package]] @@ -140,9 +140,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc208515aa0151028e464cc94a692156e945ce5126abd3537bb7fd6ba2143ed1" +checksum = "84ef243634a39fb6e9d1710737e7a5ef96c9bacabd2326859ff889bc9ef755e5" dependencies = [ "arrow-arith", "arrow-array", @@ -161,9 +161,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e07e726e2b3f7816a85c6a45b6ec118eeeabf0b2a8c208122ad949437181f49a" +checksum = "8f420c6aef51dad2e4a96ce29c0ec90ad84880bdb60b321c74c652a6be07b93f" dependencies = [ "arrow-array", "arrow-buffer", @@ -175,9 +175,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2262eba4f16c78496adfd559a29fe4b24df6088efc9985a873d58e92be022d5" +checksum = "24bda5ff6461a4ff9739959b3d57b377f45e3f878f7be1a4f28137c0a8f339fa" dependencies = [ "ahash", "arrow-buffer", @@ -192,9 +192,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e899dade2c3b7f5642eb8366cfd898958bcca099cde6dfea543c7e8d3ad88d4" +checksum = "bc6ed265c73f134a583d02c3cab5e16afab9446d8048ede8707e31f85fad58a0" dependencies = [ "bytes", "half", @@ -203,9 +203,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4103d88c5b441525ed4ac23153be7458494c2b0c9a11115848fdb9b81f6f886a" +checksum = "01c648572391edcef10e5fd458db70ba27ed6f71bcaee04397d0cfb100b34f8b" dependencies = [ "arrow-array", "arrow-buffer", @@ -224,9 +224,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d3cb0914486a3cae19a5cad2598e44e225d53157926d0ada03c20521191a65" +checksum = "a02fb265a6d8011a7d3ad1a36f25816ad0a3bb04cb8e9fe7929c165b98c0cbcd" dependencies = [ "arrow-array", "arrow-cast", @@ -240,9 +240,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a329fb064477c9ec5f0870d2f5130966f91055c7c5bce2b3a084f116bc28c3b" +checksum = "5f2cebf504bb6a92a134a87fff98f01b14fbb3a93ecf7aef90cd0f888c5fffa4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -252,9 +252,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddecdeab02491b1ce88885986e25002a3da34dd349f682c7cfe67bab7cc17b86" +checksum = "8e6405b287671c88846e7751f7291f717b164911474cabac6d3d8614d5aa7374" dependencies = [ "arrow-array", "arrow-buffer", @@ -266,9 +266,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d03b9340013413eb84868682ace00a1098c81a5ebc96d279f7ebf9a4cac3c0fd" +checksum = "5329bf9e7390cbb6b117ddd4d82e94c5362ea4cab5095697139429f36a38350c" dependencies = [ "arrow-array", "arrow-buffer", @@ -278,18 +278,19 @@ dependencies = [ "chrono", "half", "indexmap 2.8.0", - "indexmap 2.8.0", "lexical-core", + "memchr", "num", "serde", "serde_json", + "simdutf8", ] [[package]] name = "arrow-ord" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f841bfcc1997ef6ac48ee0305c4dfceb1f7c786fe31e67c1186edf775e1f1160" +checksum = "e103c13d4b80da28339c1d7aa23dd85bd59f42158acc45d39eeb6770627909ce" dependencies = [ "arrow-array", "arrow-buffer", @@ -300,9 +301,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1eeb55b0a0a83851aa01f2ca5ee5648f607e8506ba6802577afdda9d75cdedcd" +checksum = "170549a11b8534f3097a0619cfe89c42812345dc998bcf81128fc700b84345b8" dependencies = [ "arrow-array", "arrow-buffer", @@ -313,15 +314,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85934a9d0261e0fa5d4e2a5295107d743b543a6e0484a835d4b8db2da15306f9" +checksum = "a5c53775bba63f319189f366d2b86e9a8889373eb198f07d8544938fc9f8ed9a" [[package]] name = "arrow-select" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e2932aece2d0c869dd2125feb9bd1709ef5c445daa3838ac4112dcfa0fda52c" +checksum = "0a99003b2eb562b8d9c99dfb672306f15e94b20d3734179d596895703e821dcf" dependencies = [ "ahash", "arrow-array", @@ -333,9 +334,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "912e38bd6a7a7714c1d9b61df80315685553b7455e8a6045c27531d8ecd5b458" +checksum = "90fdb130ee8325f4cd8262e19bb6baa3cbcef2b2573c4bee8c6fda7ea08199d7" dependencies = [ "arrow-array", "arrow-buffer", @@ -351,12 +352,9 @@ dependencies = [ [[package]] name = "async-compression" version = "0.4.19" -version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" -checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "bzip2", "bzip2", "flate2", "futures-core", @@ -371,10 +369,8 @@ dependencies = [ [[package]] name = "async-trait" version = "0.1.88" -version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" -checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", @@ -438,9 +434,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "blake2" @@ -454,10 +450,8 @@ dependencies = [ [[package]] name = "blake3" version = "1.7.0" -version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b17679a8d69b6d7fd9cd9801a536cec9fa5e5970b69f9d4747f70b39b031f5e7" -checksum = "b17679a8d69b6d7fd9cd9801a536cec9fa5e5970b69f9d4747f70b39b031f5e7" dependencies = [ "arrayref", "arrayvec", @@ -535,9 +529,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.11" +version = "1.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" +checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" dependencies = [ "jobserver", "libc", @@ -552,22 +546,22 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.39" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", "serde", - "windows-targets", + "windows-link", ] [[package]] name = "chrono-tz" -version = "0.10.1" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f" +checksum = "efdce149c370f133a071ca8ef6ea340b7b88748ab0810097a9e2976eaa34b4f3" dependencies = [ "chrono", "chrono-tz-build", @@ -576,9 +570,9 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" +checksum = "8f10f8c9340e31fc120ff885fcdb54a0b48e474bbd77cab557f0c30a3e569402" dependencies = [ "parse-zoneinfo", "phf_codegen", @@ -592,12 +586,11 @@ checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "comfy-table" -version = "7.1.3" +version = "7.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" dependencies = [ - "strum", - "strum_macros", + "unicode-segmentation", "unicode-width", ] @@ -687,9 +680,9 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" dependencies = [ "memchr", ] @@ -746,8 +739,7 @@ dependencies = [ [[package]] name = "datafusion" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "914e6f9525599579abbd90b0f7a55afcaaaa40350b9e9ed52563f126dfe45fd3" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "arrow-ipc", @@ -755,7 +747,6 @@ dependencies = [ "async-trait", "bytes", "bzip2", - "bzip2", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -798,8 +789,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "998a6549e6ee4ee3980e05590b2960446a56b343ea30199ef38acd0e0b9036e2" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "async-trait", @@ -818,8 +808,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5ac10096a5b3c0d8a227176c0e543606860842e943594ccddb45cf42a526e43" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "async-trait", @@ -840,8 +829,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f53d7ec508e1b3f68bd301cee3f649834fad51eff9240d898a4b2614cfd0a7a" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "ahash", "arrow", @@ -850,7 +838,6 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap 2.8.0", - "indexmap 2.8.0", "libc", "log", "object_store", @@ -865,8 +852,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0fcf41523b22e14cc349b01526e8b9f59206653037f2949a4adbfde5f8cb668" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "log", "tokio", @@ -875,15 +861,13 @@ dependencies = [ [[package]] name = "datafusion-datasource" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf7f37ad8b6e88b46c7eeab3236147d32ea64b823544f498455a8d9042839c92" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", "bzip2", - "bzip2", "chrono", "datafusion-catalog", "datafusion-common", @@ -910,14 +894,12 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7db7a0239fd060f359dc56c6e7db726abaa92babaed2fb2e91c3a8b2fff8b256" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" [[package]] name = "datafusion-execution" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0938f9e5b6bc5782be4111cdfb70c02b7b5451bf34fd57e4de062a7f7c4e31f1" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "dashmap", @@ -935,8 +917,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b36c28b00b00019a8695ad7f1a53ee1673487b90322ecbd604e2cf32894eb14f" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "chrono", @@ -947,7 +928,6 @@ dependencies = [ "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap 2.8.0", - "indexmap 2.8.0", "paste", "recursive", "serde_json", @@ -957,13 +937,11 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "datafusion-common", "indexmap 2.8.0", - "indexmap 2.8.0", "itertools 0.14.0", "paste", ] @@ -971,8 +949,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3196e37d7b65469fb79fee4f05e5bb58a456831035f9a38aa5919aeb3298d40" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "arrow-buffer", @@ -1000,8 +977,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adfc2d074d5ee4d9354fdcc9283d5b2b9037849237ddecb8942a29144b77ca05" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "ahash", "arrow", @@ -1021,8 +997,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cbceba0f98d921309a9121b702bcd49289d383684cccabf9a92cda1602f3bbb" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "ahash", "arrow", @@ -1034,8 +1009,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "170e27ce4baa27113ddf5f77f1a7ec484b0dbeda0c7abbd4bad3fc609c8ab71a" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "arrow-ord", @@ -1055,8 +1029,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d3a06a7f0817ded87b026a437e7e51de7f59d48173b0a4e803aa896a7bd6bb5" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "async-trait", @@ -1071,8 +1044,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6c608b66496a1e05e3d196131eb9bebea579eed1f59e88d962baf3dda853bc6" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1088,8 +1060,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2f9d83348957b4ad0cd87b5cb9445f2651863a36592fe5484d43b49a5f8d82" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1098,8 +1069,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "datafusion-expr", "quote", @@ -1109,8 +1079,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "971c51c54cd309001376fae752fb15a6b41750b6d1552345c46afbfb6458801b" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "chrono", @@ -1118,7 +1087,6 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "indexmap 2.8.0", - "indexmap 2.8.0", "itertools 0.14.0", "log", "recursive", @@ -1129,8 +1097,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1447c2c6bc8674a16be4786b4abf528c302803fafa186aa6275692570e64d85" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "ahash", "arrow", @@ -1142,7 +1109,6 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap 2.8.0", - "indexmap 2.8.0", "itertools 0.14.0", "log", "paste", @@ -1152,8 +1118,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f8c25dcd069073a75b3d2840a79d0f81e64bdd2c05f2d3d18939afb36a7dcb" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "ahash", "arrow", @@ -1166,8 +1131,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68da5266b5b9847c11d1b3404ee96b1d423814e1973e1ad3789131e5ec912763" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "datafusion-common", @@ -1185,8 +1149,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cc160df00e413e370b3b259c8ea7bfbebc134d32de16325950e9e923846b7f" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "ahash", "arrow", @@ -1205,7 +1168,6 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap 2.8.0", - "indexmap 2.8.0", "itertools 0.14.0", "log", "parking_lot", @@ -1216,15 +1178,13 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "46.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325a212b67b677c0eb91447bf9a11b630f9fc4f62d8e5d145bf859f5a6b29e64" +source = "git+https://github.com/Canner/datafusion.git?branch=v46.0.1#80009c4c820c1e6347a66e5cf3232150bd5bc0a2" dependencies = [ "arrow", "bigdecimal", "datafusion-common", "datafusion-expr", "indexmap 2.8.0", - "indexmap 2.8.0", "log", "recursive", "regex", @@ -1233,9 +1193,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.11" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +checksum = "28cfac68e08048ae1883171632c2aef3ebc555621ae56fbccce1cbf22dd7f058" dependencies = [ "powerfmt", "serde", @@ -1265,9 +1225,9 @@ dependencies = [ [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "env_filter" @@ -1281,22 +1241,22 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcaee3d8e3cfc3fd92428d477bc97fc29ec8716d180c0d74c643bb26166660e0" +checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697" dependencies = [ "anstream", "anstyle", "env_filter", - "humantime", + "jiff", "log", ] [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" @@ -1473,14 +1433,14 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" dependencies = [ "cfg-if", "libc", - "wasi 0.13.3+wasi-0.2.2", - "windows-targets", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -1498,10 +1458,8 @@ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "half" version = "2.5.0" -version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1" -checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1" dependencies = [ "cfg-if", "crunchy", @@ -1544,20 +1502,21 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "humantime" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "b2fd658b06e56721792c5df4475705b6cda790e9298d19d2f8af083457bcd127" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", "windows-core", ] @@ -1612,9 +1571,9 @@ dependencies = [ [[package]] name = "icu_locid_transform_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" +checksum = "7515e6d781098bf9f7205ab3fc7e9709d34554ae0b21ddbcb5febfa4bc7df11d" [[package]] name = "icu_normalizer" @@ -1636,9 +1595,9 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" +checksum = "c5e8338228bdc8ab83303f16b797e177953730f601a96c25d10cb3ab0daa0cb7" [[package]] name = "icu_properties" @@ -1657,9 +1616,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" +checksum = "85fb8799753b75aee8d2a21d7c14d9f38921b54b3dbda10f5a3c7a7b82dba5e2" [[package]] name = "icu_provider" @@ -1730,10 +1689,8 @@ dependencies = [ [[package]] name = "indexmap" version = "2.8.0" -version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" -checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -1742,9 +1699,9 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.5" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" [[package]] name = "integer-encoding" @@ -1778,9 +1735,33 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jiff" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c102670231191d07d37a35af3eb77f1f0dbf7a71be51a962dcd57ea607be7260" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "4cdde31a9d349f1b1f51a0b3714a5940ac022976f4b49485fc04be052b183b4c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "jobserver" @@ -1874,10 +1855,8 @@ dependencies = [ [[package]] name = "libc" version = "0.2.171" -version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" -checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libm" @@ -1887,15 +1866,15 @@ checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "linux-raw-sys" -version = "0.4.15" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +checksum = "fe7db12097d22ec582439daf8618b8fdd1a7bef6270e9af3b1ebcd30893cf413" [[package]] name = "litemap" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" [[package]] name = "lock_api" @@ -1909,9 +1888,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.25" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lz4_flex" @@ -2079,9 +2058,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.2" +version = "1.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "c2806eaa3524762875e21c3dcd057bc4b7bfa01ce4da8d46be1cd43649e1cc6b" [[package]] name = "ordered-float" @@ -2117,9 +2096,9 @@ dependencies = [ [[package]] name = "parquet" -version = "54.2.1" +version = "54.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f88838dca3b84d41444a0341b19f347e8098a3898b0f21536654b8b799e11abd" +checksum = "94243778210509a5a5e9e012872127180c155d73a9cd6e2df9243d213e81e100" dependencies = [ "ahash", "arrow-array", @@ -2149,7 +2128,6 @@ dependencies = [ "tokio", "twox-hash", "zstd", - "zstd-sys", ] [[package]] @@ -2181,7 +2159,6 @@ checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", "indexmap 2.8.0", - "indexmap 2.8.0", ] [[package]] @@ -2246,15 +2223,24 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "portable-atomic" -version = "1.10.0" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] [[package]] name = "powerfmt" @@ -2264,45 +2250,45 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy", + "zerocopy 0.8.24", ] [[package]] name = "proc-macro-crate" -version = "3.2.0" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" dependencies = [ "toml_edit", ] [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" dependencies = [ "unicode-ident", ] [[package]] name = "psm" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88" dependencies = [ "cc", ] [[package]] name = "pyo3" -version = "0.23.4" +version = "0.23.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57fe09249128b3173d092de9523eaa75136bf7ba85e0d69eca241c7939c933cc" +checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872" dependencies = [ "cfg-if", "indoc", @@ -2318,9 +2304,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.23.4" +version = "0.23.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd3927b5a78757a0d71aa9dff669f903b1eb64b54142a9bd9f757f8fde65fd7" +checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb" dependencies = [ "once_cell", "target-lexicon", @@ -2328,9 +2314,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.23.4" +version = "0.23.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dab6bb2102bd8f991e7749f130a70d05dd557613e39ed2deeee8e9ca0c4d548d" +checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d" dependencies = [ "libc", "pyo3-build-config", @@ -2338,9 +2324,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.23.4" +version = "0.23.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91871864b353fd5ffcb3f91f2f703a22a9797c91b9ab497b1acac7b07ae509c7" +checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2350,9 +2336,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.23.4" +version = "0.23.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43abc3b80bc20f3facd86cd3c60beed58c3e2aa26213f3cda368de39c60a27e4" +checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028" dependencies = [ "heck", "proc-macro2", @@ -2364,14 +2350,18 @@ dependencies = [ [[package]] name = "quote" version = "1.0.40" -version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "rand" version = "0.8.5" @@ -2424,11 +2414,11 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.8" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" +checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", ] [[package]] @@ -2513,11 +2503,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.44" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +checksum = "e56a18552996ac8d29ecc3b190b4fdbb2d91ca4ec396de7bbffaf43f3d637e96" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "errno", "libc", "linux-raw-sys", @@ -2526,15 +2516,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" +checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" [[package]] name = "ryu" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" @@ -2553,30 +2543,30 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.25" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" [[package]] name = "seq-macro" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" [[package]] name = "serde" -version = "1.0.217" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.217" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", @@ -2585,9 +2575,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.138" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", @@ -2606,7 +2596,6 @@ dependencies = [ "hex", "indexmap 1.9.3", "indexmap 2.8.0", - "indexmap 2.8.0", "serde", "serde_derive", "serde_json", @@ -2666,9 +2655,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" [[package]] name = "snafu" @@ -2727,9 +2716,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stacker" -version = "0.1.17" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +checksum = "601f9201feb9b09c00266478bf459952b9ef9a6b94edb2f21eba14ab681a60a9" dependencies = [ "cc", "cfg-if", @@ -2750,25 +2739,6 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn", -] - [[package]] name = "subtle" version = "2.6.1" @@ -2778,10 +2748,8 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" version = "2.0.100" -version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" -checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -2807,13 +2775,12 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.16.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91" +checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" dependencies = [ - "cfg-if", "fastrand", - "getrandom 0.3.1", + "getrandom 0.3.2", "once_cell", "rustix", "windows-sys", @@ -2821,18 +2788,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", @@ -2852,9 +2819,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.37" +version = "0.3.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" +checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" dependencies = [ "deranged", "itoa", @@ -2867,15 +2834,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" +checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" [[package]] name = "time-macros" -version = "0.2.19" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" +checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" dependencies = [ "num-conv", "time-core", @@ -2903,10 +2870,8 @@ dependencies = [ [[package]] name = "tokio" version = "1.44.1" -version = "1.44.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" -checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" dependencies = [ "backtrace", "bytes", @@ -2928,10 +2893,8 @@ dependencies = [ [[package]] name = "tokio-util" version = "0.7.14" -version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034" -checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034" dependencies = [ "bytes", "futures-core", @@ -2948,11 +2911,10 @@ checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" [[package]] name = "toml_edit" -version = "0.22.23" +version = "0.22.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee" +checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" dependencies = [ - "indexmap 2.8.0", "indexmap 2.8.0", "toml_datetime", "winnow", @@ -3001,15 +2963,15 @@ dependencies = [ [[package]] name = "typenum" -version = "1.17.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" [[package]] name = "unicode-ident" -version = "1.0.16" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-segmentation" @@ -3025,9 +2987,9 @@ checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" [[package]] name = "unindent" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" [[package]] name = "url" @@ -3061,12 +3023,10 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" version = "1.16.0" -version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" -checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ - "getrandom 0.3.1", + "getrandom 0.3.2", "js-sys", "wasm-bindgen", ] @@ -3095,9 +3055,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasi" -version = "0.13.3+wasi-0.2.2" +version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ "wit-bindgen-rt", ] @@ -3188,6 +3148,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + [[package]] name = "windows-sys" version = "0.59.0" @@ -3263,20 +3229,20 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.7.0" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e49d2d35d3fad69b39b94139037ecfb4f359f08958b9c11e7315ce770462419" +checksum = "0e97b544156e9bebe1a0ffbc03484fc1ffe3100cbce3ffb17eac35f7cdd7ab36" dependencies = [ "memchr", ] [[package]] name = "wit-bindgen-rt" -version = "0.33.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", ] [[package]] @@ -3388,8 +3354,16 @@ version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ - "byteorder", - "zerocopy-derive", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" +dependencies = [ + "zerocopy-derive 0.8.24", ] [[package]] @@ -3403,20 +3377,31 @@ dependencies = [ "syn", ] +[[package]] +name = "zerocopy-derive" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zerofrom" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", @@ -3448,27 +3433,27 @@ dependencies = [ [[package]] name = "zstd" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "7.2.1" +version = "7.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.13+zstd.1.5.6" +version = "2.0.15+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" dependencies = [ "cc", "pkg-config", diff --git a/wren-core-py/tests/functions.csv b/wren-core-py/tests/functions.csv index a2691eb86..bc34b858b 100644 --- a/wren-core-py/tests/functions.csv +++ b/wren-core-py/tests/functions.csv @@ -5,3 +5,4 @@ scalar,json_extract,jsonb,"input,pattern","jsonb,text","Extracts a value from a scalar,add_custom,int,,"int,int","Adds two numbers together." scalar,test_same_as_input_array,same_as_input_first_array_element,,,"Returns the greatest value from the first array." scalar,test_return_type,same_as_input,,,"Returns the same type as the input." +scalar,test_without_param_type,string,,,"Without param type" diff --git a/wren-core-py/tests/test_modeling_core.py b/wren-core-py/tests/test_modeling_core.py index bdf62d098..c2ae0b260 100644 --- a/wren-core-py/tests/test_modeling_core.py +++ b/wren-core-py/tests/test_modeling_core.py @@ -106,7 +106,7 @@ def test_read_function_list(): path = "tests/functions.csv" session_context = SessionContext(manifest_str, path) functions = session_context.get_available_functions() - assert len(functions) == 25947 + assert len(functions) == 25948 rewritten_sql = session_context.transform_sql( "SELECT add_two(c_custkey, c_custkey) FROM my_catalog.my_schema.customer" @@ -135,34 +135,43 @@ def test_get_available_functions(): max_if = next(f for f in functions if f.name == "max_if") assert max_if.name == "max_if" assert max_if.function_type == "window" - assert max_if.param_names == None - assert max_if.param_types == None + assert max_if.param_names is None + assert max_if.param_types is None func = next(f for f in functions if f.name == "add_custom") assert func.name == "add_custom" assert func.function_type == "scalar" assert func.description == "Adds two numbers together." assert func.return_type == "Int32" - assert func.param_names == None + assert func.param_names is None assert func.param_types == "Int32,Int32" func = next(f for f in functions if f.name == "test_same_as_input_array") assert func.name == "test_same_as_input_array" assert func.function_type == "scalar" assert func.description == "Returns the greatest value from the first array." - assert func.return_type == None - assert func.param_names == None - assert func.param_types == None - + assert func.return_type is None + assert func.param_names is None + assert func.param_types is None func = next(f for f in functions if f.name == "test_return_type") assert func.name == "test_return_type" assert func.function_type == "scalar" assert func.description == "Returns the same type as the input." - assert func.return_type == None - assert func.param_names == None - assert func.param_types == None + assert func.return_type is None + assert func.param_names is None + assert func.param_types is None + func = next(f for f in functions if f.name == "test_without_param_type") + assert func.name == "test_without_param_type" + assert func.function_type == "scalar" + assert func.description == "Without param type" + # It's a string type actually. However, it misses the param type in the CSV. + # DataFusion builds the return type from the param type in information_schema. + # If lossing the param type, it will be None. + assert func.return_type is None + assert func.param_names is None + assert func.param_types is None @pytest.mark.parametrize( From d9a5484021f6d99b5d52ccd75c3c21fce3008381 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 18:03:58 +0800 Subject: [PATCH 21/24] fix fmt --- wren-core/core/src/mdl/function.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/wren-core/core/src/mdl/function.rs b/wren-core/core/src/mdl/function.rs index 54f2bcd34..77890c0c4 100644 --- a/wren-core/core/src/mdl/function.rs +++ b/wren-core/core/src/mdl/function.rs @@ -50,12 +50,7 @@ impl RemoteFunction { if types.iter().any(|x| x.is_none()) { return None; } - Some( - types - .into_iter() - .flatten() - .collect(), - ) + Some(types.into_iter().flatten().collect()) } } From 4b579aa954d6aab4dac426226846377ec1237135 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 18:06:13 +0800 Subject: [PATCH 22/24] fix negative test for struct --- wren-core/core/src/mdl/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-core/core/src/mdl/mod.rs b/wren-core/core/src/mdl/mod.rs index 94f1e3a95..332a51d12 100644 --- a/wren-core/core/src/mdl/mod.rs +++ b/wren-core/core/src/mdl/mod.rs @@ -1307,7 +1307,7 @@ mod test { .map_err(|e| { assert_eq!( e.to_string(), - "Error during planning: struct must have at least one field" + "Execution error: The expression to get an indexed field is only valid for `Struct`, `Map` or `Null` types, got Utf8" ) }); Ok(()) From a965395163ff906187e681dd0de12ddde4e23174 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 18:07:33 +0800 Subject: [PATCH 23/24] fix clippy --- wren-core/core/src/mdl/function.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wren-core/core/src/mdl/function.rs b/wren-core/core/src/mdl/function.rs index 77890c0c4..0364510b8 100644 --- a/wren-core/core/src/mdl/function.rs +++ b/wren-core/core/src/mdl/function.rs @@ -45,7 +45,7 @@ impl RemoteFunction { fn transform_param_type(param_types: &[Option]) -> Option> { let types = param_types .iter() - .map(|t| t.clone().map(|x| map_data_type(x.as_str()).ok()).flatten()) + .map(|t| t.clone().and_then(|x| map_data_type(x.as_str()).ok())) .collect::>(); if types.iter().any(|x| x.is_none()) { return None; From 7ac4ccb59f555ba5ff35a2a003bd939ca6a060ba Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 28 Mar 2025 18:37:48 +0800 Subject: [PATCH 24/24] re-enable the test for snowflake --- .../routers/v3/connector/snowflake/test_functions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py b/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py index 6fb80f61e..633410c91 100644 --- a/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py +++ b/ibis-server/tests/routers/v3/connector/snowflake/test_functions.py @@ -43,11 +43,11 @@ def set_remote_function_list_path(): async def test_function_list(client): config = get_config() - # config.set_remote_function_list_path(None) - # response = await client.get(url=f"{base_url}/functions") - # assert response.status_code == 200 - # result = response.json() - # assert len(result) == DATAFUSION_FUNCTION_COUNT + config.set_remote_function_list_path(None) + response = await client.get(url=f"{base_url}/functions") + assert response.status_code == 200 + result = response.json() + assert len(result) == DATAFUSION_FUNCTION_COUNT config.set_remote_function_list_path(function_list_path) response = await client.get(url=f"{base_url}/functions")