From 6661b3c3b011cb6e972fda43b222e63201661937 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 18:17:41 -0500 Subject: [PATCH 1/5] fix: preserve source field metadata in TryCast expressions TryCast's `to_field` implementation fell through to a default case that created a new Field without metadata, unlike Cast which had a dedicated handler preserving the source field's metadata. This adds the same metadata-preserving logic for TryCast. Co-Authored-By: Claude Opus 4.6 (1M context) --- datafusion/expr/src/expr_schema.rs | 12 +++++++++- .../sqllogictest/test_files/metadata.slt | 22 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 81d788b1a2302..85f5eda628c05 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -574,12 +574,22 @@ impl ExprSchemable for Expr { id: _, field: Some(field), }) => Ok(Arc::clone(field).renamed(&schema_name)), + Expr::TryCast(TryCast { expr, field }) => expr + .to_field(schema) + .map(|(_table_ref, destination_field)| { + destination_field + .as_ref() + .clone() + .with_data_type(field.data_type().clone()) + .with_nullable(true) + .with_metadata(destination_field.metadata().clone()) + }) + .map(Arc::new), Expr::Like(_) | Expr::SimilarTo(_) | Expr::Not(_) | Expr::Between(_) | Expr::Case(_) - | Expr::TryCast(_) | Expr::InList(_) | Expr::InSubquery(_) | Expr::SetComparison(_) diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt index f3836b23ec321..109c9e4694b72 100644 --- a/datafusion/sqllogictest/test_files/metadata.slt +++ b/datafusion/sqllogictest/test_files/metadata.slt @@ -347,5 +347,27 @@ select arrow_metadata(id) from table_with_metadata limit 1; ---- {metadata_key: the id field} +# Regression test: TRY_CAST should preserve source field metadata +query DT +SELECT + TRY_CAST(ts AS DATE) as try_casted, + arrow_metadata(TRY_CAST(ts AS DATE), 'metadata_key') +FROM table_with_metadata; +---- +2020-09-08 ts non-nullable field +2020-09-08 ts non-nullable field +2020-09-08 ts non-nullable field + +# Regression test: TRY_CAST preserves metadata on integer column +query IT +SELECT + TRY_CAST(id AS BIGINT) as try_casted, + arrow_metadata(TRY_CAST(id AS BIGINT), 'metadata_key') +FROM table_with_metadata; +---- +1 the id field +NULL the id field +3 the id field + statement ok drop table table_with_metadata; From 3e283d6c595342c8a8789ee8153e86f30ef08d4b Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 18:21:01 -0500 Subject: [PATCH 2/5] test: add arrow_metadata tests for CAST metadata preservation Co-Authored-By: Claude Opus 4.6 (1M context) --- .../sqllogictest/test_files/metadata.slt | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt index 109c9e4694b72..58cc3fbeeff5f 100644 --- a/datafusion/sqllogictest/test_files/metadata.slt +++ b/datafusion/sqllogictest/test_files/metadata.slt @@ -220,6 +220,28 @@ FROM table_with_metadata; 2020-09-08 2020-09-08 +# Regression test: CAST should preserve source field metadata +query DT +SELECT + CAST(ts AS DATE) as casted, + arrow_metadata(CAST(ts AS DATE), 'metadata_key') +FROM table_with_metadata; +---- +2020-09-08 ts non-nullable field +2020-09-08 ts non-nullable field +2020-09-08 ts non-nullable field + +# Regression test: CAST preserves metadata on integer column +query IT +SELECT + CAST(id AS BIGINT) as casted, + arrow_metadata(CAST(id AS BIGINT), 'metadata_key') +FROM table_with_metadata; +---- +1 the id field +NULL the id field +3 the id field + # Regression test: distinct with cast query D SELECT DISTINCT (ts::DATE) AS dist From eeaafbe743a24f64a745cd020ee305dbced024c6 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 5 Apr 2026 18:22:04 -0500 Subject: [PATCH 3/5] test: add TRY_CAST metadata test for failed cast (NULL result) Verifies that metadata is preserved even when TRY_CAST returns NULL due to an invalid conversion (e.g. non-numeric string to INT). Co-Authored-By: Claude Opus 4.6 (1M context) --- datafusion/sqllogictest/test_files/metadata.slt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt index 58cc3fbeeff5f..ba2f37e40d89a 100644 --- a/datafusion/sqllogictest/test_files/metadata.slt +++ b/datafusion/sqllogictest/test_files/metadata.slt @@ -391,5 +391,16 @@ FROM table_with_metadata; NULL the id field 3 the id field +# Regression test: TRY_CAST preserves metadata even when cast fails (returns NULL) +query IT +SELECT + TRY_CAST(name AS INT) as try_casted, + arrow_metadata(TRY_CAST(name AS INT), 'metadata_key') +FROM table_with_metadata; +---- +NULL the name field +NULL the name field +NULL the name field + statement ok drop table table_with_metadata; From 47547455cca44449c65cf044efc6318cc4975404 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Mon, 6 Apr 2026 01:21:35 -0500 Subject: [PATCH 4/5] Apply suggestion from @adriangb --- datafusion/expr/src/expr_schema.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 85f5eda628c05..250698667a5f5 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -582,7 +582,6 @@ impl ExprSchemable for Expr { .clone() .with_data_type(field.data_type().clone()) .with_nullable(true) - .with_metadata(destination_field.metadata().clone()) }) .map(Arc::new), Expr::Like(_) From edd20273e003d399e58a269aece14f55a09b3ea0 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 7 Apr 2026 07:20:09 -0500 Subject: [PATCH 5/5] refactor: extract shared cast_output_field helper for Cast/TryCast Address review feedback by extracting duplicated to_field() logic for Cast and TryCast into a shared helper. Also adds single-argument arrow_metadata tests for both CAST and TRY_CAST. Co-Authored-By: Claude Opus 4.6 (1M context) --- datafusion/expr/src/expr_schema.rs | 51 ++++++++++--------- .../sqllogictest/test_files/metadata.slt | 12 +++++ 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 250698667a5f5..b613db4b6ad1d 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -65,6 +65,24 @@ pub trait ExprSchemable { -> Result<(DataType, bool)>; } +/// Derives the output field for a cast expression from the source field. +/// For `TryCast`, `force_nullable` is `true` since a failed cast returns NULL. +fn cast_output_field( + source_field: &FieldRef, + target_type: &DataType, + force_nullable: bool, +) -> Arc { + let mut f = source_field + .as_ref() + .clone() + .with_data_type(target_type.clone()) + .with_metadata(source_field.metadata().clone()); + if force_nullable { + f = f.with_nullable(true); + } + Arc::new(f) +} + impl ExprSchemable for Expr { /// Returns the [arrow::datatypes::DataType] of the expression /// based on [ExprSchema] @@ -553,37 +571,20 @@ impl ExprSchemable for Expr { func.return_field_from_args(args) } // _ => Ok((self.get_type(schema)?, self.nullable(schema)?)), - Expr::Cast(Cast { expr, field }) => expr - .to_field(schema) - .map(|(_table_ref, destination_field)| { - // This propagates the nullability of the input rather than - // force the nullability of the destination field. This is - // usually the desired behaviour (i.e., specifying a cast - // destination type usually does not force a user to pick - // nullability, and assuming `true` would prevent the non-nullability - // of the parent expression to make the result eligible for - // optimizations that only apply to non-nullable values). - destination_field - .as_ref() - .clone() - .with_data_type(field.data_type().clone()) - .with_metadata(destination_field.metadata().clone()) + Expr::Cast(Cast { expr, field }) => { + expr.to_field(schema).map(|(_table_ref, src)| { + cast_output_field(&src, field.data_type(), false) }) - .map(Arc::new), + } Expr::Placeholder(Placeholder { id: _, field: Some(field), }) => Ok(Arc::clone(field).renamed(&schema_name)), - Expr::TryCast(TryCast { expr, field }) => expr - .to_field(schema) - .map(|(_table_ref, destination_field)| { - destination_field - .as_ref() - .clone() - .with_data_type(field.data_type().clone()) - .with_nullable(true) + Expr::TryCast(TryCast { expr, field }) => { + expr.to_field(schema).map(|(_table_ref, src)| { + cast_output_field(&src, field.data_type(), true) }) - .map(Arc::new), + } Expr::Like(_) | Expr::SimilarTo(_) | Expr::Not(_) diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt index ba2f37e40d89a..11ed4fc632e2a 100644 --- a/datafusion/sqllogictest/test_files/metadata.slt +++ b/datafusion/sqllogictest/test_files/metadata.slt @@ -242,6 +242,12 @@ FROM table_with_metadata; NULL the id field 3 the id field +# Regression test: CAST with single-argument arrow_metadata (returns full map) +query ? +select arrow_metadata(CAST(id AS BIGINT)) from table_with_metadata limit 1; +---- +{metadata_key: the id field} + # Regression test: distinct with cast query D SELECT DISTINCT (ts::DATE) AS dist @@ -402,5 +408,11 @@ NULL the name field NULL the name field NULL the name field +# Regression test: TRY_CAST with single-argument arrow_metadata (returns full map) +query ? +select arrow_metadata(TRY_CAST(id AS BIGINT)) from table_with_metadata limit 1; +---- +{metadata_key: the id field} + statement ok drop table table_with_metadata;