From 7fc781f4f8ee0c6faebdf59631a346a7a610dc33 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 2 Apr 2026 10:13:47 -0500 Subject: [PATCH 01/18] feat: add cast_to_type UDF for type-based casting Add a `cast_to_type(expression, reference)` function that casts the first argument to the data type of the second argument, similar to DuckDB's cast_to_type. The second argument's type (not value) determines the target cast type, which is useful in macros and generic SQL where types need to be preserved dynamically. Co-Authored-By: Claude Opus 4.6 (1M context) --- datafusion/functions/src/core/cast_to_type.rs | 147 ++++++++++++++++++ datafusion/functions/src/core/mod.rs | 7 + .../sqllogictest/test_files/cast_to_type.slt | 141 +++++++++++++++++ 3 files changed, 295 insertions(+) create mode 100644 datafusion/functions/src/core/cast_to_type.rs create mode 100644 datafusion/sqllogictest/test_files/cast_to_type.slt diff --git a/datafusion/functions/src/core/cast_to_type.rs b/datafusion/functions/src/core/cast_to_type.rs new file mode 100644 index 000000000000..22b91e470da5 --- /dev/null +++ b/datafusion/functions/src/core/cast_to_type.rs @@ -0,0 +1,147 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [`CastToTypeFunc`]: Implementation of the `cast_to_type` + +use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::{ + Result, datatype::DataTypeExt, internal_err, utils::take_function_args, +}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; +use datafusion_expr::{ + Coercion, ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, + ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, +}; +use datafusion_macros::user_doc; + +/// Casts the first argument to the data type of the second argument. +/// +/// Only the type of the second argument is used; its value is ignored. +/// This is useful in macros or generic SQL where you need to preserve +/// or match types dynamically. +/// +/// For example: +/// ```sql +/// select cast_to_type('42', NULL::INTEGER); +/// ``` +#[user_doc( + doc_section(label = "Other Functions"), + description = "Casts the first argument to the data type of the second argument. Only the type of the second argument is used; its value is ignored.", + syntax_example = "cast_to_type(expression, reference)", + sql_example = r#"```sql +> select cast_to_type('42', NULL::INTEGER) as a; ++----+ +| a | ++----+ +| 42 | ++----+ + +> select cast_to_type(1 + 2, NULL::DOUBLE) as b; ++-----+ +| b | ++-----+ +| 3.0 | ++-----+ +```"#, + argument( + name = "expression", + description = "Expression to cast. The expression can be a constant, column, or function, and any combination of operators." + ), + argument( + name = "reference", + description = "Reference expression whose data type determines the target cast type. The value is ignored." + ) +)] +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct CastToTypeFunc { + signature: Signature, +} + +impl Default for CastToTypeFunc { + fn default() -> Self { + Self::new() + } +} + +impl CastToTypeFunc { + pub fn new() -> Self { + Self { + signature: Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Any), + Coercion::new_exact(TypeSignatureClass::Any), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for CastToTypeFunc { + fn name(&self) -> &str { + "cast_to_type" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be called instead") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); + let [_, reference_field] = take_function_args(self.name(), args.arg_fields)?; + let target_type = reference_field.data_type().clone(); + Ok(Field::new(self.name(), target_type, nullable).into()) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + internal_err!("cast_to_type should have been simplified to cast") + } + + fn simplify( + &self, + mut args: Vec, + info: &SimplifyContext, + ) -> Result { + let [_, type_arg] = take_function_args(self.name(), &args)?; + let target_type = info.get_data_type(type_arg)?; + + // remove second (reference) argument + args.pop().unwrap(); + let arg = args.pop().unwrap(); + + let source_type = info.get_data_type(&arg)?; + let new_expr = if source_type == target_type { + // the argument's data type is already the correct type + arg + } else { + // Use an actual cast to get the correct type + Expr::Cast(datafusion_expr::Cast { + expr: Box::new(arg), + field: target_type.into_nullable_field_ref(), + }) + }; + Ok(ExprSimplifyResult::Simplified(new_expr)) + } + + fn documentation(&self) -> Option<&Documentation> { + self.doc() + } +} diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs index e8737612a1dc..caf261535de7 100644 --- a/datafusion/functions/src/core/mod.rs +++ b/datafusion/functions/src/core/mod.rs @@ -24,6 +24,7 @@ pub mod arrow_cast; pub mod arrow_metadata; pub mod arrow_try_cast; pub mod arrowtypeof; +pub mod cast_to_type; pub mod coalesce; pub mod expr_ext; pub mod getfield; @@ -44,6 +45,7 @@ pub mod version; // create UDFs make_udf_function!(arrow_cast::ArrowCastFunc, arrow_cast); make_udf_function!(arrow_try_cast::ArrowTryCastFunc, arrow_try_cast); +make_udf_function!(cast_to_type::CastToTypeFunc, cast_to_type); make_udf_function!(nullif::NullIfFunc, nullif); make_udf_function!(nvl::NVLFunc, nvl); make_udf_function!(nvl2::NVL2Func, nvl2); @@ -75,6 +77,10 @@ pub mod expr_fn { arrow_try_cast, "Casts a value to a specific Arrow data type, returning NULL if the cast fails", arg1 arg2 + ),( + cast_to_type, + "Casts the first argument to the data type of the second argument", + arg1 arg2 ),( nvl, "Returns value2 if value1 is NULL; otherwise it returns value1", @@ -147,6 +153,7 @@ pub fn functions() -> Vec> { nullif(), arrow_cast(), arrow_try_cast(), + cast_to_type(), arrow_metadata(), nvl(), nvl2(), diff --git a/datafusion/sqllogictest/test_files/cast_to_type.slt b/datafusion/sqllogictest/test_files/cast_to_type.slt new file mode 100644 index 000000000000..cfaa0889bff3 --- /dev/null +++ b/datafusion/sqllogictest/test_files/cast_to_type.slt @@ -0,0 +1,141 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +####### +## Tests for cast_to_type function +####### + +# Basic string to integer cast +query I +SELECT cast_to_type('42', NULL::INTEGER); +---- +42 + +# String to double cast +query R +SELECT cast_to_type('3.14', NULL::DOUBLE); +---- +3.14 + +# Integer to string cast +query T +SELECT cast_to_type(42, NULL::VARCHAR); +---- +42 + +# Integer to double cast +query R +SELECT cast_to_type(42, NULL::DOUBLE); +---- +42 + +# Same-type is a no-op +query I +SELECT cast_to_type(42, 0::INTEGER); +---- +42 + +# NULL first argument +query I +SELECT cast_to_type(NULL, 0::INTEGER); +---- +NULL + +# NULL reference (type still applies) +query I +SELECT cast_to_type('42', NULL::INTEGER); +---- +42 + +# CASE expression as first argument +query I +SELECT cast_to_type(CASE WHEN true THEN '1' ELSE '2' END, NULL::INTEGER); +---- +1 + +# Arithmetic expression as first argument +query R +SELECT cast_to_type(1 + 2, NULL::DOUBLE); +---- +3 + +# Nested cast_to_type +query T +SELECT cast_to_type(cast_to_type('3.14', NULL::DOUBLE), NULL::VARCHAR); +---- +3.14 + +# Subquery as second argument +query I +SELECT cast_to_type('42', (SELECT NULL::INTEGER)); +---- +42 + +# Column reference as second argument +statement ok +CREATE TABLE t1 (int_col INTEGER, text_col VARCHAR, double_col DOUBLE); + +statement ok +INSERT INTO t1 VALUES (1, 'hello', 3.14), (2, 'world', 2.72); + +query I +SELECT cast_to_type('99', int_col) FROM t1 LIMIT 1; +---- +99 + +query T +SELECT cast_to_type(123, text_col) FROM t1 LIMIT 1; +---- +123 + +query R +SELECT cast_to_type('1.5', double_col) FROM t1 LIMIT 1; +---- +1.5 + +# Use with column values as first argument +query R +SELECT cast_to_type(int_col, NULL::DOUBLE) FROM t1; +---- +1 +2 + +# Cast column to match another column's type +query T +SELECT cast_to_type(int_col, text_col) FROM t1; +---- +1 +2 + +# Boolean cast +query B +SELECT cast_to_type(1, NULL::BOOLEAN); +---- +true + +# String to date cast +query D +SELECT cast_to_type('2024-01-15', NULL::DATE); +---- +2024-01-15 + +# Error on invalid cast +statement error +SELECT cast_to_type('not_a_number', NULL::INTEGER); + +statement ok +DROP TABLE t1; From 421be8ef4107ccb87f5897aa6c4d4efd695bd987 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 2 Apr 2026 10:19:09 -0500 Subject: [PATCH 02/18] feat: add try_cast_to_type UDF (fallible variant) Add `try_cast_to_type(expression, reference)` which works like `cast_to_type` but returns NULL on cast failure instead of erroring, similar to the relationship between arrow_cast and arrow_try_cast. Co-Authored-By: Claude Opus 4.6 (1M context) --- datafusion/functions/src/core/mod.rs | 7 + .../functions/src/core/try_cast_to_type.rs | 135 ++++++++++++++++++ .../sqllogictest/test_files/cast_to_type.slt | 116 +++++++++++++++ 3 files changed, 258 insertions(+) create mode 100644 datafusion/functions/src/core/try_cast_to_type.rs diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs index caf261535de7..d3c48573667c 100644 --- a/datafusion/functions/src/core/mod.rs +++ b/datafusion/functions/src/core/mod.rs @@ -38,6 +38,7 @@ pub mod nvl2; pub mod overlay; pub mod planner; pub mod r#struct; +pub mod try_cast_to_type; pub mod union_extract; pub mod union_tag; pub mod version; @@ -46,6 +47,7 @@ pub mod version; make_udf_function!(arrow_cast::ArrowCastFunc, arrow_cast); make_udf_function!(arrow_try_cast::ArrowTryCastFunc, arrow_try_cast); make_udf_function!(cast_to_type::CastToTypeFunc, cast_to_type); +make_udf_function!(try_cast_to_type::TryCastToTypeFunc, try_cast_to_type); make_udf_function!(nullif::NullIfFunc, nullif); make_udf_function!(nvl::NVLFunc, nvl); make_udf_function!(nvl2::NVL2Func, nvl2); @@ -81,6 +83,10 @@ pub mod expr_fn { cast_to_type, "Casts the first argument to the data type of the second argument", arg1 arg2 + ),( + try_cast_to_type, + "Casts the first argument to the data type of the second argument, returning NULL on failure", + arg1 arg2 ),( nvl, "Returns value2 if value1 is NULL; otherwise it returns value1", @@ -154,6 +160,7 @@ pub fn functions() -> Vec> { arrow_cast(), arrow_try_cast(), cast_to_type(), + try_cast_to_type(), arrow_metadata(), nvl(), nvl2(), diff --git a/datafusion/functions/src/core/try_cast_to_type.rs b/datafusion/functions/src/core/try_cast_to_type.rs new file mode 100644 index 000000000000..4eed4ab7ddd2 --- /dev/null +++ b/datafusion/functions/src/core/try_cast_to_type.rs @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [`TryCastToTypeFunc`]: Implementation of the `try_cast_to_type` + +use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::{ + Result, datatype::DataTypeExt, internal_err, utils::take_function_args, +}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; +use datafusion_expr::{ + Coercion, ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, + ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, +}; +use datafusion_macros::user_doc; + +/// Like [`cast_to_type`](super::cast_to_type::CastToTypeFunc) but returns NULL +/// on cast failure instead of erroring. +/// +/// This is implemented by simplifying `try_cast_to_type(expr, ref)` into +/// `Expr::TryCast` during optimization. +#[user_doc( + doc_section(label = "Other Functions"), + description = "Casts the first argument to the data type of the second argument, returning NULL if the cast fails. Only the type of the second argument is used; its value is ignored.", + syntax_example = "try_cast_to_type(expression, reference)", + sql_example = r#"```sql +> select try_cast_to_type('123', NULL::INTEGER) as a, + try_cast_to_type('not_a_number', NULL::INTEGER) as b; + ++-----+------+ +| a | b | ++-----+------+ +| 123 | NULL | ++-----+------+ +```"#, + argument( + name = "expression", + description = "Expression to cast. The expression can be a constant, column, or function, and any combination of operators." + ), + argument( + name = "reference", + description = "Reference expression whose data type determines the target cast type. The value is ignored." + ) +)] +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct TryCastToTypeFunc { + signature: Signature, +} + +impl Default for TryCastToTypeFunc { + fn default() -> Self { + Self::new() + } +} + +impl TryCastToTypeFunc { + pub fn new() -> Self { + Self { + signature: Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Any), + Coercion::new_exact(TypeSignatureClass::Any), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for TryCastToTypeFunc { + fn name(&self) -> &str { + "try_cast_to_type" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + internal_err!("return_field_from_args should be called instead") + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + // TryCast can always return NULL (on cast failure), so always nullable + let [_, reference_field] = take_function_args(self.name(), args.arg_fields)?; + let target_type = reference_field.data_type().clone(); + Ok(Field::new(self.name(), target_type, true).into()) + } + + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { + internal_err!("try_cast_to_type should have been simplified to try_cast") + } + + fn simplify( + &self, + mut args: Vec, + info: &SimplifyContext, + ) -> Result { + let [_, type_arg] = take_function_args(self.name(), &args)?; + let target_type = info.get_data_type(type_arg)?; + + // remove second (reference) argument + args.pop().unwrap(); + let arg = args.pop().unwrap(); + + let source_type = info.get_data_type(&arg)?; + let new_expr = if source_type == target_type { + arg + } else { + Expr::TryCast(datafusion_expr::TryCast { + expr: Box::new(arg), + field: target_type.into_nullable_field_ref(), + }) + }; + Ok(ExprSimplifyResult::Simplified(new_expr)) + } + + fn documentation(&self) -> Option<&Documentation> { + self.doc() + } +} diff --git a/datafusion/sqllogictest/test_files/cast_to_type.slt b/datafusion/sqllogictest/test_files/cast_to_type.slt index cfaa0889bff3..e48da7cb2d87 100644 --- a/datafusion/sqllogictest/test_files/cast_to_type.slt +++ b/datafusion/sqllogictest/test_files/cast_to_type.slt @@ -139,3 +139,119 @@ SELECT cast_to_type('not_a_number', NULL::INTEGER); statement ok DROP TABLE t1; + +####### +## Tests for try_cast_to_type function (fallible variant returning NULL) +####### + +# Basic string to integer cast +query I +SELECT try_cast_to_type('42', NULL::INTEGER); +---- +42 + +# Invalid cast returns NULL instead of error +query I +SELECT try_cast_to_type('not_a_number', NULL::INTEGER); +---- +NULL + +# String to double cast +query R +SELECT try_cast_to_type('3.14', NULL::DOUBLE); +---- +3.14 + +# Invalid double returns NULL +query R +SELECT try_cast_to_type('abc', NULL::DOUBLE); +---- +NULL + +# Integer to string cast (always succeeds) +query T +SELECT try_cast_to_type(42, NULL::VARCHAR); +---- +42 + +# Same-type is a no-op +query I +SELECT try_cast_to_type(42, 0::INTEGER); +---- +42 + +# NULL first argument +query I +SELECT try_cast_to_type(NULL, 0::INTEGER); +---- +NULL + +# CASE expression as first argument +query I +SELECT try_cast_to_type(CASE WHEN true THEN '1' ELSE '2' END, NULL::INTEGER); +---- +1 + +# Arithmetic expression as first argument +query R +SELECT try_cast_to_type(1 + 2, NULL::DOUBLE); +---- +3 + +# Nested: try_cast_to_type inside cast_to_type +query T +SELECT cast_to_type(try_cast_to_type('3.14', NULL::DOUBLE), NULL::VARCHAR); +---- +3.14 + +# Subquery as second argument +query I +SELECT try_cast_to_type('42', (SELECT NULL::INTEGER)); +---- +42 + +# Column reference as second argument +statement ok +CREATE TABLE t2 (int_col INTEGER, text_col VARCHAR); + +statement ok +INSERT INTO t2 VALUES (1, 'hello'), (2, 'world'); + +query I +SELECT try_cast_to_type('99', int_col) FROM t2 LIMIT 1; +---- +99 + +query I +SELECT try_cast_to_type(text_col, int_col) FROM t2; +---- +NULL +NULL + +# Cast column to match another column's type +query T +SELECT try_cast_to_type(int_col, text_col) FROM t2; +---- +1 +2 + +# Boolean cast +query B +SELECT try_cast_to_type(1, NULL::BOOLEAN); +---- +true + +# String to date - valid +query D +SELECT try_cast_to_type('2024-01-15', NULL::DATE); +---- +2024-01-15 + +# String to date - invalid returns NULL +query D +SELECT try_cast_to_type('not_a_date', NULL::DATE); +---- +NULL + +statement ok +DROP TABLE t2; From 7013469ccf8aadcf2666b03d18dff09a21cd1169 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 2 Apr 2026 10:40:01 -0500 Subject: [PATCH 03/18] chore: regenerate scalar_functions.md docs Co-Authored-By: Claude Opus 4.6 (1M context) --- .../source/user-guide/sql/scalar_functions.md | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 022b0f9daec8..8d8fe0e2c816 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -5191,7 +5191,9 @@ union_tag(union_expression) - [arrow_metadata](#arrow_metadata) - [arrow_try_cast](#arrow_try_cast) - [arrow_typeof](#arrow_typeof) +- [cast_to_type](#cast_to_type) - [get_field](#get_field) +- [try_cast_to_type](#try_cast_to_type) - [version](#version) ### `arrow_cast` @@ -5311,6 +5313,37 @@ arrow_typeof(expression) +---------------------------+------------------------+ ``` +### `cast_to_type` + +Casts the first argument to the data type of the second argument. Only the type of the second argument is used; its value is ignored. + +```sql +cast_to_type(expression, reference) +``` + +#### Arguments + +- **expression**: Expression to cast. The expression can be a constant, column, or function, and any combination of operators. +- **reference**: Reference expression whose data type determines the target cast type. The value is ignored. + +#### Example + +```sql +> select cast_to_type('42', NULL::INTEGER) as a; ++----+ +| a | ++----+ +| 42 | ++----+ + +> select cast_to_type(1 + 2, NULL::DOUBLE) as b; ++-----+ +| b | ++-----+ +| 3.0 | ++-----+ +``` + ### `get_field` Returns a field within a map or a struct with the given key. @@ -5363,6 +5396,32 @@ get_field(expression, field_name[, field_name2, ...]) +--------+ ``` +### `try_cast_to_type` + +Casts the first argument to the data type of the second argument, returning NULL if the cast fails. Only the type of the second argument is used; its value is ignored. + +```sql +try_cast_to_type(expression, reference) +``` + +#### Arguments + +- **expression**: Expression to cast. The expression can be a constant, column, or function, and any combination of operators. +- **reference**: Reference expression whose data type determines the target cast type. The value is ignored. + +#### Example + +```sql +> select try_cast_to_type('123', NULL::INTEGER) as a, + try_cast_to_type('not_a_number', NULL::INTEGER) as b; + ++-----+------+ +| a | b | ++-----+------+ +| 123 | NULL | ++-----+------+ +``` + ### `version` Returns the version of DataFusion. From 7891a1fd079a502b07d77f540cbb4356757b8360 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 11:32:22 -0500 Subject: [PATCH 04/18] fix: cast_to_type nullability should only depend on the first argument The second argument (reference) is used solely for its data type, so its nullability should not propagate to the result. Previously `cast_to_type(42, NULL::INTEGER)` was incorrectly marked nullable in the schema even though the input literal is non-null. Co-Authored-By: Claude Opus 4.6 (1M context) --- datafusion/functions/src/core/cast_to_type.rs | 9 ++- .../sqllogictest/test_files/cast_to_type.slt | 59 +++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/datafusion/functions/src/core/cast_to_type.rs b/datafusion/functions/src/core/cast_to_type.rs index 22b91e470da5..3713f34df0bd 100644 --- a/datafusion/functions/src/core/cast_to_type.rs +++ b/datafusion/functions/src/core/cast_to_type.rs @@ -105,9 +105,14 @@ impl ScalarUDFImpl for CastToTypeFunc { } fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { - let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); - let [_, reference_field] = take_function_args(self.name(), args.arg_fields)?; + let [source_field, reference_field] = + take_function_args(self.name(), args.arg_fields)?; let target_type = reference_field.data_type().clone(); + // Nullability is inherited only from the first argument (the value + // being cast). The second argument is used solely for its type, so + // its own nullability is irrelevant. The one exception is when the + // target type is Null – that type is inherently nullable. + let nullable = source_field.is_nullable() || target_type == DataType::Null; Ok(Field::new(self.name(), target_type, nullable).into()) } diff --git a/datafusion/sqllogictest/test_files/cast_to_type.slt b/datafusion/sqllogictest/test_files/cast_to_type.slt index e48da7cb2d87..6975582df81d 100644 --- a/datafusion/sqllogictest/test_files/cast_to_type.slt +++ b/datafusion/sqllogictest/test_files/cast_to_type.slt @@ -140,6 +140,46 @@ SELECT cast_to_type('not_a_number', NULL::INTEGER); statement ok DROP TABLE t1; +####### +## Nullability tests for cast_to_type +####### + +statement ok +set datafusion.catalog.information_schema = true; + +# Non-nullable input -> non-nullable output +statement ok +CREATE VIEW v_cast_nonnull AS SELECT cast_to_type(42, NULL::INTEGER) as a; + +query TTT +SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE table_name = 'v_cast_nonnull'; +---- +a Int32 NO + +statement ok +DROP VIEW v_cast_nonnull; + +# Nullable input -> nullable output +statement ok +CREATE TABLE t_nullable (x INTEGER); + +statement ok +INSERT INTO t_nullable VALUES (1), (NULL); + +statement ok +CREATE VIEW v_cast_null AS SELECT cast_to_type(x, NULL::DOUBLE) as a FROM t_nullable; + +query TTT +SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE table_name = 'v_cast_null'; +---- +a Float64 YES + +statement ok +DROP VIEW v_cast_null; + +statement ok +DROP TABLE t_nullable; + ####### ## Tests for try_cast_to_type function (fallible variant returning NULL) ####### @@ -255,3 +295,22 @@ NULL statement ok DROP TABLE t2; + +####### +## Nullability tests for try_cast_to_type +####### + +# try_cast_to_type is always nullable (cast can fail) +statement ok +CREATE VIEW v_trycast AS SELECT try_cast_to_type(42, NULL::INTEGER) as a; + +query TTT +SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE table_name = 'v_trycast'; +---- +a Int32 YES + +statement ok +DROP VIEW v_trycast; + +statement ok +set datafusion.catalog.information_schema = false; From 09147eac970570194ba1fe25fe976900e70da7ba Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:06:24 -0500 Subject: [PATCH 05/18] Update datafusion/functions/src/core/cast_to_type.rs Co-authored-by: Martin Grigorov --- datafusion/functions/src/core/cast_to_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/core/cast_to_type.rs b/datafusion/functions/src/core/cast_to_type.rs index 3713f34df0bd..f097ee7b352d 100644 --- a/datafusion/functions/src/core/cast_to_type.rs +++ b/datafusion/functions/src/core/cast_to_type.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! [`CastToTypeFunc`]: Implementation of the `cast_to_type` +//! [`CastToTypeFunc`]: Implementation of the `cast_to_type` function use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::{ From 954e029dd2e443ff5e1b98f5c91bb61f6f2b3046 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:17:59 -0500 Subject: [PATCH 06/18] update tests --- .../sqllogictest/test_files/cast_to_type.slt | 53 ++++++++++++++----- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/datafusion/sqllogictest/test_files/cast_to_type.slt b/datafusion/sqllogictest/test_files/cast_to_type.slt index 6975582df81d..7fa5da8434a3 100644 --- a/datafusion/sqllogictest/test_files/cast_to_type.slt +++ b/datafusion/sqllogictest/test_files/cast_to_type.slt @@ -21,25 +21,25 @@ # Basic string to integer cast query I -SELECT cast_to_type('42', NULL::INTEGER); +SELECT cast_to_type('42', 1::INTEGER); ---- 42 # String to double cast query R -SELECT cast_to_type('3.14', NULL::DOUBLE); +SELECT cast_to_type('3.14', 1.0::DOUBLE); ---- 3.14 # Integer to string cast query T -SELECT cast_to_type(42, NULL::VARCHAR); +SELECT cast_to_type(42, 'a'::VARCHAR); ---- 42 # Integer to double cast query R -SELECT cast_to_type(42, NULL::DOUBLE); +SELECT cast_to_type(42, 0.0::DOUBLE); ---- 42 @@ -49,18 +49,30 @@ SELECT cast_to_type(42, 0::INTEGER); ---- 42 -# NULL first argument -query I -SELECT cast_to_type(NULL, 0::INTEGER); +# Second argument is a typed NULL double +query R +SELECT cast_to_type('3.14', NULL::DOUBLE); ---- -NULL +3.14 -# NULL reference (type still applies) +# Second argument is a typed NULL integer query I -SELECT cast_to_type('42', NULL::INTEGER); +SELECT cast_to_type(42, NULL::INTEGER); ---- 42 +# Second argument is a typed NULL string +query T +SELECT cast_to_type('42', NULL::VARCHAR); +---- +42 + +# NULL first argument +query I +SELECT cast_to_type(NULL, 0::INTEGER); +---- +NULL + # CASE expression as first argument query I SELECT cast_to_type(CASE WHEN true THEN '1' ELSE '2' END, NULL::INTEGER); @@ -107,9 +119,15 @@ SELECT cast_to_type('1.5', double_col) FROM t1 LIMIT 1; ---- 1.5 +# Case statement as second argument +query I +SELECT cast_to_type('42', CASE WHEN random() < 2 THEN 1 ELSE 0 END); +---- +42 + # Use with column values as first argument query R -SELECT cast_to_type(int_col, NULL::DOUBLE) FROM t1; +SELECT cast_to_type(int_col, 1.0::DOUBLE) FROM t1; ---- 1 2 @@ -167,13 +185,22 @@ statement ok INSERT INTO t_nullable VALUES (1), (NULL); statement ok -CREATE VIEW v_cast_null AS SELECT cast_to_type(x, NULL::DOUBLE) as a FROM t_nullable; +CREATE VIEW v_cast_null AS SELECT cast_to_type(x, 1.0::DOUBLE) as a FROM t_nullable; query TTT SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE table_name = 'v_cast_null'; ---- a Float64 YES +# If we cast to the null type itself the result is nullable even if the input is not +statement ok +CREATE VIEW v_cast_to_null AS SELECT cast_to_type(42, null) as a; + +query TTT +SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE table_name = 'v_cast_to_null'; +---- +a Null YES + statement ok DROP VIEW v_cast_null; @@ -302,7 +329,7 @@ DROP TABLE t2; # try_cast_to_type is always nullable (cast can fail) statement ok -CREATE VIEW v_trycast AS SELECT try_cast_to_type(42, NULL::INTEGER) as a; +CREATE VIEW v_trycast AS SELECT try_cast_to_type(42, 1::INTEGER) as a; query TTT SELECT column_name, data_type, is_nullable FROM information_schema.columns WHERE table_name = 'v_trycast'; From 06b93b097273c5e095f1a35f520a489398780747 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:19:04 -0500 Subject: [PATCH 07/18] Update datafusion/functions/src/core/cast_to_type.rs Co-authored-by: Martin Grigorov --- datafusion/functions/src/core/cast_to_type.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/src/core/cast_to_type.rs b/datafusion/functions/src/core/cast_to_type.rs index f097ee7b352d..964e68329e64 100644 --- a/datafusion/functions/src/core/cast_to_type.rs +++ b/datafusion/functions/src/core/cast_to_type.rs @@ -137,10 +137,11 @@ impl ScalarUDFImpl for CastToTypeFunc { // the argument's data type is already the correct type arg } else { + let nullable = source_field.is_nullable() || target_type == DataType::Null; // Use an actual cast to get the correct type Expr::Cast(datafusion_expr::Cast { expr: Box::new(arg), - field: target_type.into_nullable_field_ref(), + field: Field::new("", target_type, nullable).into(), }) }; Ok(ExprSimplifyResult::Simplified(new_expr)) From 2a4bdd76b5f7b6082476a80c2c7019f742698021 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:19:13 -0500 Subject: [PATCH 08/18] Update docs/source/user-guide/sql/scalar_functions.md Co-authored-by: Martin Grigorov --- docs/source/user-guide/sql/scalar_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 8d8fe0e2c816..e695b915a662 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -5406,7 +5406,7 @@ try_cast_to_type(expression, reference) #### Arguments -- **expression**: Expression to cast. The expression can be a constant, column, or function, and any combination of operators. +- **expression**: Expression to cast. The expression can be a constant, a column, or a function, and any combination of operators. - **reference**: Reference expression whose data type determines the target cast type. The value is ignored. #### Example From be6aabf14307c2c8aac2da09ab95b2f3e2ab3e3a Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:19:22 -0500 Subject: [PATCH 09/18] Update docs/source/user-guide/sql/scalar_functions.md Co-authored-by: Martin Grigorov --- docs/source/user-guide/sql/scalar_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index e695b915a662..48a5de1c569e 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -5323,7 +5323,7 @@ cast_to_type(expression, reference) #### Arguments -- **expression**: Expression to cast. The expression can be a constant, column, or function, and any combination of operators. +- **expression**: Expression to cast. The expression can be a constant, a column, or a function, and any combination of operators. - **reference**: Reference expression whose data type determines the target cast type. The value is ignored. #### Example From bcc40d08abe249554c1ce4f9650fd66a91dcac43 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:19:38 -0500 Subject: [PATCH 10/18] Update datafusion/functions/src/core/try_cast_to_type.rs Co-authored-by: Martin Grigorov --- datafusion/functions/src/core/try_cast_to_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/core/try_cast_to_type.rs b/datafusion/functions/src/core/try_cast_to_type.rs index 4eed4ab7ddd2..c2bc86087a78 100644 --- a/datafusion/functions/src/core/try_cast_to_type.rs +++ b/datafusion/functions/src/core/try_cast_to_type.rs @@ -49,7 +49,7 @@ use datafusion_macros::user_doc; ```"#, argument( name = "expression", - description = "Expression to cast. The expression can be a constant, column, or function, and any combination of operators." + description = "The expression to cast. It can be a constant, column, or function, and any combination of operators." ), argument( name = "reference", From 1007af6448d3401be73302eb5b59a7283adb437d Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:19:48 -0500 Subject: [PATCH 11/18] Update datafusion/functions/src/core/cast_to_type.rs Co-authored-by: Martin Grigorov --- datafusion/functions/src/core/cast_to_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/core/cast_to_type.rs b/datafusion/functions/src/core/cast_to_type.rs index 964e68329e64..f8efaba21ef7 100644 --- a/datafusion/functions/src/core/cast_to_type.rs +++ b/datafusion/functions/src/core/cast_to_type.rs @@ -59,7 +59,7 @@ use datafusion_macros::user_doc; ```"#, argument( name = "expression", - description = "Expression to cast. The expression can be a constant, column, or function, and any combination of operators." + description = "The expression to cast. It can be a constant, column, or function, and any combination of operators." ), argument( name = "reference", From 8f896dcb432e1bf5aa84dba7b4889b3334d9fd8b Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:20:02 -0500 Subject: [PATCH 12/18] Update datafusion/functions/src/core/try_cast_to_type.rs Co-authored-by: Martin Grigorov --- datafusion/functions/src/core/try_cast_to_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/src/core/try_cast_to_type.rs b/datafusion/functions/src/core/try_cast_to_type.rs index c2bc86087a78..7eb3720a6472 100644 --- a/datafusion/functions/src/core/try_cast_to_type.rs +++ b/datafusion/functions/src/core/try_cast_to_type.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! [`TryCastToTypeFunc`]: Implementation of the `try_cast_to_type` +//! [`TryCastToTypeFunc`]: Implementation of the `try_cast_to_type` function use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::{ From bcfd87f64838cb4a809521f3a45b2bd53213effb Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:33:09 -0500 Subject: [PATCH 13/18] get rid of `pop().unwrap()`s --- datafusion/functions/src/core/arrow_cast.rs | 13 +++++------ .../functions/src/core/arrow_try_cast.rs | 12 +++++----- datafusion/functions/src/core/cast_to_type.rs | 22 +++++++------------ .../functions/src/core/try_cast_to_type.rs | 16 +++++--------- 4 files changed, 24 insertions(+), 39 deletions(-) diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs index b05296721655..e4eb617ab05b 100644 --- a/datafusion/functions/src/core/arrow_cast.rs +++ b/datafusion/functions/src/core/arrow_cast.rs @@ -154,23 +154,20 @@ impl ScalarUDFImpl for ArrowCastFunc { fn simplify( &self, - mut args: Vec, + args: Vec, info: &SimplifyContext, ) -> Result { // convert this into a real cast let target_type = data_type_from_args(self.name(), &args)?; - // remove second (type) argument - args.pop().unwrap(); - let arg = args.pop().unwrap(); - - let source_type = info.get_data_type(&arg)?; + let [source_arg, _type_arg] = take_function_args(self.name(), args)?; + let source_type = info.get_data_type(&source_arg)?; let new_expr = if source_type == target_type { // the argument's data type is already the correct type - arg + source_arg } else { // Use an actual cast to get the correct type Expr::Cast(datafusion_expr::Cast { - expr: Box::new(arg), + expr: Box::new(source_arg), field: target_type.into_nullable_field_ref(), }) }; diff --git a/datafusion/functions/src/core/arrow_try_cast.rs b/datafusion/functions/src/core/arrow_try_cast.rs index 61a5291c05ed..e89903c84ae7 100644 --- a/datafusion/functions/src/core/arrow_try_cast.rs +++ b/datafusion/functions/src/core/arrow_try_cast.rs @@ -127,20 +127,18 @@ impl ScalarUDFImpl for ArrowTryCastFunc { fn simplify( &self, - mut args: Vec, + args: Vec, info: &SimplifyContext, ) -> Result { let target_type = data_type_from_args(self.name(), &args)?; - // remove second (type) argument - args.pop().unwrap(); - let arg = args.pop().unwrap(); + let [source_arg, _type_arg] = take_function_args(self.name(), args)?; - let source_type = info.get_data_type(&arg)?; + let source_type = info.get_data_type(&source_arg)?; let new_expr = if source_type == target_type { - arg + source_arg } else { Expr::TryCast(datafusion_expr::TryCast { - expr: Box::new(arg), + expr: Box::new(source_arg), field: target_type.into_nullable_field_ref(), }) }; diff --git a/datafusion/functions/src/core/cast_to_type.rs b/datafusion/functions/src/core/cast_to_type.rs index f8efaba21ef7..cd1c93ed3153 100644 --- a/datafusion/functions/src/core/cast_to_type.rs +++ b/datafusion/functions/src/core/cast_to_type.rs @@ -18,9 +18,7 @@ //! [`CastToTypeFunc`]: Implementation of the `cast_to_type` function use arrow::datatypes::{DataType, Field, FieldRef}; -use datafusion_common::{ - Result, datatype::DataTypeExt, internal_err, utils::take_function_args, -}; +use datafusion_common::{Result, internal_err, utils::take_function_args}; use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, @@ -122,25 +120,21 @@ impl ScalarUDFImpl for CastToTypeFunc { fn simplify( &self, - mut args: Vec, + args: Vec, info: &SimplifyContext, ) -> Result { - let [_, type_arg] = take_function_args(self.name(), &args)?; - let target_type = info.get_data_type(type_arg)?; - - // remove second (reference) argument - args.pop().unwrap(); - let arg = args.pop().unwrap(); + let [source_arg, type_arg] = take_function_args(self.name(), args)?; + let target_type = info.get_data_type(&type_arg)?; - let source_type = info.get_data_type(&arg)?; + let source_type = info.get_data_type(&source_arg)?; let new_expr = if source_type == target_type { // the argument's data type is already the correct type - arg + source_arg } else { - let nullable = source_field.is_nullable() || target_type == DataType::Null; + let nullable = info.nullable(&source_arg)? || target_type == DataType::Null; // Use an actual cast to get the correct type Expr::Cast(datafusion_expr::Cast { - expr: Box::new(arg), + expr: Box::new(source_arg), field: Field::new("", target_type, nullable).into(), }) }; diff --git a/datafusion/functions/src/core/try_cast_to_type.rs b/datafusion/functions/src/core/try_cast_to_type.rs index 7eb3720a6472..cc2b6c66f356 100644 --- a/datafusion/functions/src/core/try_cast_to_type.rs +++ b/datafusion/functions/src/core/try_cast_to_type.rs @@ -107,22 +107,18 @@ impl ScalarUDFImpl for TryCastToTypeFunc { fn simplify( &self, - mut args: Vec, + args: Vec, info: &SimplifyContext, ) -> Result { - let [_, type_arg] = take_function_args(self.name(), &args)?; - let target_type = info.get_data_type(type_arg)?; + let [source_arg, type_arg] = take_function_args(self.name(), args)?; + let target_type = info.get_data_type(&type_arg)?; - // remove second (reference) argument - args.pop().unwrap(); - let arg = args.pop().unwrap(); - - let source_type = info.get_data_type(&arg)?; + let source_type = info.get_data_type(&source_arg)?; let new_expr = if source_type == target_type { - arg + source_arg } else { Expr::TryCast(datafusion_expr::TryCast { - expr: Box::new(arg), + expr: Box::new(source_arg), field: target_type.into_nullable_field_ref(), }) }; From 0da8d3ea8824ca1cc49848b8fb4c2f0441adc5ea Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:36:01 -0500 Subject: [PATCH 14/18] clean up data_type_from_args() --- datafusion/functions/src/core/arrow_cast.rs | 10 ++++------ datafusion/functions/src/core/arrow_try_cast.rs | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs index e4eb617ab05b..c15a0dc36a23 100644 --- a/datafusion/functions/src/core/arrow_cast.rs +++ b/datafusion/functions/src/core/arrow_cast.rs @@ -158,8 +158,8 @@ impl ScalarUDFImpl for ArrowCastFunc { info: &SimplifyContext, ) -> Result { // convert this into a real cast - let target_type = data_type_from_args(self.name(), &args)?; - let [source_arg, _type_arg] = take_function_args(self.name(), args)?; + let [source_arg, type_arg] = take_function_args(self.name(), args)?; + let target_type = data_type_from_args(self.name(), &type_arg)?; let source_type = info.get_data_type(&source_arg)?; let new_expr = if source_type == target_type { // the argument's data type is already the correct type @@ -180,10 +180,8 @@ impl ScalarUDFImpl for ArrowCastFunc { } } -/// Returns the requested type from the arguments -pub(crate) fn data_type_from_args(name: &str, args: &[Expr]) -> Result { - let [_, type_arg] = take_function_args(name, args)?; - +/// Returns the requested type from the type argument +pub(crate) fn data_type_from_args(name: &str, type_arg: &Expr) -> Result { let Expr::Literal(ScalarValue::Utf8(Some(val)), _) = type_arg else { return exec_err!( "{name} requires its second argument to be a constant string, got {:?}", diff --git a/datafusion/functions/src/core/arrow_try_cast.rs b/datafusion/functions/src/core/arrow_try_cast.rs index e89903c84ae7..138a7418ae35 100644 --- a/datafusion/functions/src/core/arrow_try_cast.rs +++ b/datafusion/functions/src/core/arrow_try_cast.rs @@ -130,8 +130,8 @@ impl ScalarUDFImpl for ArrowTryCastFunc { args: Vec, info: &SimplifyContext, ) -> Result { - let target_type = data_type_from_args(self.name(), &args)?; - let [source_arg, _type_arg] = take_function_args(self.name(), args)?; + let [source_arg, type_arg] = take_function_args(self.name(), args)?; + let target_type = data_type_from_args(self.name(), &type_arg)?; let source_type = info.get_data_type(&source_arg)?; let new_expr = if source_type == target_type { From be67a29beb2ea43e0c86ffdcc252471780f05b8c Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:36:44 -0500 Subject: [PATCH 15/18] rename --- datafusion/functions/src/core/arrow_cast.rs | 4 ++-- datafusion/functions/src/core/arrow_try_cast.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs index c15a0dc36a23..0b67883c17c8 100644 --- a/datafusion/functions/src/core/arrow_cast.rs +++ b/datafusion/functions/src/core/arrow_cast.rs @@ -159,7 +159,7 @@ impl ScalarUDFImpl for ArrowCastFunc { ) -> Result { // convert this into a real cast let [source_arg, type_arg] = take_function_args(self.name(), args)?; - let target_type = data_type_from_args(self.name(), &type_arg)?; + let target_type = data_type_from_type_arg(self.name(), &type_arg)?; let source_type = info.get_data_type(&source_arg)?; let new_expr = if source_type == target_type { // the argument's data type is already the correct type @@ -181,7 +181,7 @@ impl ScalarUDFImpl for ArrowCastFunc { } /// Returns the requested type from the type argument -pub(crate) fn data_type_from_args(name: &str, type_arg: &Expr) -> Result { +pub(crate) fn data_type_from_type_arg(name: &str, type_arg: &Expr) -> Result { let Expr::Literal(ScalarValue::Utf8(Some(val)), _) = type_arg else { return exec_err!( "{name} requires its second argument to be a constant string, got {:?}", diff --git a/datafusion/functions/src/core/arrow_try_cast.rs b/datafusion/functions/src/core/arrow_try_cast.rs index 138a7418ae35..d27b29ba5736 100644 --- a/datafusion/functions/src/core/arrow_try_cast.rs +++ b/datafusion/functions/src/core/arrow_try_cast.rs @@ -31,7 +31,7 @@ use datafusion_expr::{ }; use datafusion_macros::user_doc; -use super::arrow_cast::data_type_from_args; +use super::arrow_cast::data_type_from_type_arg; /// Like [`arrow_cast`](super::arrow_cast::ArrowCastFunc) but returns NULL on cast failure instead of erroring. /// @@ -131,7 +131,7 @@ impl ScalarUDFImpl for ArrowTryCastFunc { info: &SimplifyContext, ) -> Result { let [source_arg, type_arg] = take_function_args(self.name(), args)?; - let target_type = data_type_from_args(self.name(), &type_arg)?; + let target_type = data_type_from_type_arg(self.name(), &type_arg)?; let source_type = info.get_data_type(&source_arg)?; let new_expr = if source_type == target_type { From 38de43870cfb500c3c70a6aa153e9da50500677f Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:37:27 -0500 Subject: [PATCH 16/18] whitespace --- datafusion/functions/src/core/cast_to_type.rs | 1 - datafusion/functions/src/core/try_cast_to_type.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/datafusion/functions/src/core/cast_to_type.rs b/datafusion/functions/src/core/cast_to_type.rs index cd1c93ed3153..abc7d440e04b 100644 --- a/datafusion/functions/src/core/cast_to_type.rs +++ b/datafusion/functions/src/core/cast_to_type.rs @@ -125,7 +125,6 @@ impl ScalarUDFImpl for CastToTypeFunc { ) -> Result { let [source_arg, type_arg] = take_function_args(self.name(), args)?; let target_type = info.get_data_type(&type_arg)?; - let source_type = info.get_data_type(&source_arg)?; let new_expr = if source_type == target_type { // the argument's data type is already the correct type diff --git a/datafusion/functions/src/core/try_cast_to_type.rs b/datafusion/functions/src/core/try_cast_to_type.rs index cc2b6c66f356..4c5af4cc6d22 100644 --- a/datafusion/functions/src/core/try_cast_to_type.rs +++ b/datafusion/functions/src/core/try_cast_to_type.rs @@ -112,7 +112,6 @@ impl ScalarUDFImpl for TryCastToTypeFunc { ) -> Result { let [source_arg, type_arg] = take_function_args(self.name(), args)?; let target_type = info.get_data_type(&type_arg)?; - let source_type = info.get_data_type(&source_arg)?; let new_expr = if source_type == target_type { source_arg From 717d51b779f7cbe348bad7fbdfe2b6daed5637c3 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 17:59:39 -0500 Subject: [PATCH 17/18] update --- docs/source/user-guide/sql/scalar_functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 48a5de1c569e..f4e7e9d841c1 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -5323,7 +5323,7 @@ cast_to_type(expression, reference) #### Arguments -- **expression**: Expression to cast. The expression can be a constant, a column, or a function, and any combination of operators. +- **expression**: The expression to cast. It can be a constant, column, or function, and any combination of operators. - **reference**: Reference expression whose data type determines the target cast type. The value is ignored. #### Example @@ -5406,7 +5406,7 @@ try_cast_to_type(expression, reference) #### Arguments -- **expression**: Expression to cast. The expression can be a constant, a column, or a function, and any combination of operators. +- **expression**: The expression to cast. It can be a constant, column, or function, and any combination of operators. - **reference**: Reference expression whose data type determines the target cast type. The value is ignored. #### Example From 08b7edbbb0922f3470b9015ef3cc17a7a85416d6 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Mon, 6 Apr 2026 09:38:31 -0500 Subject: [PATCH 18/18] Add error message regex and invalid type test for cast_to_type Co-Authored-By: Claude Opus 4.6 (1M context) --- datafusion/sqllogictest/test_files/cast_to_type.slt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/cast_to_type.slt b/datafusion/sqllogictest/test_files/cast_to_type.slt index 7fa5da8434a3..128846c0f515 100644 --- a/datafusion/sqllogictest/test_files/cast_to_type.slt +++ b/datafusion/sqllogictest/test_files/cast_to_type.slt @@ -152,9 +152,13 @@ SELECT cast_to_type('2024-01-15', NULL::DATE); 2024-01-15 # Error on invalid cast -statement error +statement error Cannot cast string 'not_a_number' to value of Int32 type SELECT cast_to_type('not_a_number', NULL::INTEGER); +# Error on invalid target type +statement error Unsupported SQL type INVALID +SELECT cast_to_type('42', NULL::INVALID); + statement ok DROP TABLE t1;