From 4e2239ed9a132ac25149d78eaf85179758f54255 Mon Sep 17 00:00:00 2001 From: Raushan Prabhakar Date: Fri, 16 Jan 2026 03:31:25 +0530 Subject: [PATCH 1/5] Fix array_contains null handling to match Spark's three-valued logic --- .../scala/org/apache/comet/serde/arrays.scala | 104 +++++++++++++++++- .../comet/CometArrayExpressionSuite.scala | 37 +++++++ 2 files changed, 139 insertions(+), 2 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index b552a071d6..a659f183fe 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -132,9 +132,108 @@ object CometArrayContains extends CometExpressionSerde[ArrayContains] { val arrayExprProto = exprToProto(expr.children.head, inputs, binding) val keyExprProto = exprToProto(expr.children(1), inputs, binding) - val arrayContainsScalarExpr = + // Check if array is null - if so, return null + val isArrayNotNullExpr = createUnaryExpr( + expr, + expr.children.head, + inputs, + binding, + (builder, unaryExpr) => builder.setIsNotNull(unaryExpr)) + + // Check if search value is null - if so, return null + val isKeyNotNullExpr = createUnaryExpr( + expr, + expr.children(1), + inputs, + binding, + (builder, unaryExpr) => builder.setIsNotNull(unaryExpr)) + + // Check if value exists in array + val arrayHasValueExpr = scalarFunctionExprToProto("array_has", arrayExprProto, keyExprProto) - optExprWithInfo(arrayContainsScalarExpr, expr, expr.children: _*) + + // Check if array contains null elements (for three-valued logic) + val nullKeyLiteralProto = exprToProto(Literal(null, expr.children(1).dataType), Seq.empty) + val arrayHasNullExpr = + scalarFunctionExprToProto("array_has", arrayExprProto, nullKeyLiteralProto) + + // Build the three-valued logic: + // 1. If array is null -> return null + // 2. If key is null -> return null + // 3. If array_has(array, key) is true -> return true + // 4. If array_has(array, key) is false AND array_has(array, null) is true + // -> return null (indeterminate) + // 5. If array_has(array, key) is false AND array_has(array, null) is false + // -> return false + if (isArrayNotNullExpr.isDefined && isKeyNotNullExpr.isDefined && + arrayHasValueExpr.isDefined && arrayHasNullExpr.isDefined && + nullKeyLiteralProto.isDefined) { + // Create boolean literals + val trueLiteralProto = exprToProto(Literal(true, BooleanType), Seq.empty) + val falseLiteralProto = exprToProto(Literal(false, BooleanType), Seq.empty) + val nullBooleanLiteralProto = exprToProto(Literal(null, BooleanType), Seq.empty) + + if (trueLiteralProto.isDefined && falseLiteralProto.isDefined && + nullBooleanLiteralProto.isDefined) { + // If array_has(array, key) is false, check if array has nulls + // If array_has(array, null) is true -> return null, else return false + val whenNotFoundCheckNulls = ExprOuterClass.CaseWhen + .newBuilder() + .addWhen(arrayHasNullExpr.get) // if array has nulls + .addThen(nullBooleanLiteralProto.get) // return null (indeterminate) + .setElseExpr(falseLiteralProto.get) // else return false + .build() + + // If array_has(array, key) is true, return true, else check null case + val whenValueFound = ExprOuterClass.CaseWhen + .newBuilder() + .addWhen(arrayHasValueExpr.get) // if value found + .addThen(trueLiteralProto.get) // return true + .setElseExpr( + ExprOuterClass.Expr + .newBuilder() + .setCaseWhen(whenNotFoundCheckNulls) + .build() + ) // else check null case + .build() + + // Check if key is null -> return null, else use the logic above + val whenKeyNotNull = ExprOuterClass.CaseWhen + .newBuilder() + .addWhen(isKeyNotNullExpr.get) // if key is not null + .addThen( + ExprOuterClass.Expr + .newBuilder() + .setCaseWhen(whenValueFound) + .build()) + .setElseExpr(nullBooleanLiteralProto.get) // key is null -> return null + .build() + + // Outer case: if array is null, return null, else use the logic above + val outerCaseWhen = ExprOuterClass.CaseWhen + .newBuilder() + .addWhen(isArrayNotNullExpr.get) // if array is not null + .addThen( + ExprOuterClass.Expr + .newBuilder() + .setCaseWhen(whenKeyNotNull) + .build()) + .setElseExpr(nullBooleanLiteralProto.get) // array is null -> return null + .build() + + Some( + ExprOuterClass.Expr + .newBuilder() + .setCaseWhen(outerCaseWhen) + .build()) + } else { + withInfo(expr, expr.children: _*) + None + } + } else { + withInfo(expr, expr.children: _*) + None + } } } @@ -614,3 +713,4 @@ trait ArraysBase { } } } + diff --git a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala index cf49117364..24809690f9 100644 --- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala @@ -325,6 +325,43 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp } } + test("array_contains - three-valued null logic") { + // Test Spark's three-valued logic for array_contains: + // 1. Returns true if value is found + // 2. Returns false if no match found AND no null elements exist + // 3. Returns null if no match found BUT null elements exist (indeterminate) + // 4. Returns null if search value is null + withTempDir { dir => + withTempView("t1") { + val path = new Path(dir.toURI.toString, "test.parquet") + makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = false, n = 100) + spark.read.parquet(path.toString).createOrReplaceTempView("t1") + + // Test case 1: value found -> returns true + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, 2, 3), 2) FROM t1")) + + // Test case 2: no match, no nulls -> returns false + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, 2, 3), 5) FROM t1")) + + // Test case 3: no match, but null exists -> returns null (indeterminate) + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, null, 3), 2) FROM t1")) + + // Test case 4: match found even with nulls -> returns true + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, null, 3), 1) FROM t1")) + + // Test case 5: search value is null -> returns null + checkSparkAnswerAndOperator( + sql("SELECT array_contains(array(1, 2, 3), cast(null as int)) FROM t1")) + + // Test case 6: array with nulls, searching for existing value -> returns true + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, null, 3), 3) FROM t1")) + + // Test case 7: empty array -> returns false + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(), 1) FROM t1")) + } + } + } + test("array_contains - test all types (convert from Parquet)") { withTempDir { dir => val path = new Path(dir.toURI.toString, "test.parquet") From cefddad5087772b8a783494b347451489d15f5ad Mon Sep 17 00:00:00 2001 From: Raushan Prabhakar Date: Sat, 17 Jan 2026 13:26:24 +0530 Subject: [PATCH 2/5] Update the array_contains - three-valued null logic test to use column references and conditional logic instead of literal arrays. This ensures the test exercises the native Comet execution path rather than being optimized away by Spark's constant folding optimizer. --- .../scala/org/apache/comet/serde/arrays.scala | 1 - .../comet/CometArrayExpressionSuite.scala | 25 +++++++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index a659f183fe..0d8013ffb6 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -713,4 +713,3 @@ trait ArraysBase { } } } - diff --git a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala index 24809690f9..e2fe83eba1 100644 --- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala @@ -338,26 +338,35 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp spark.read.parquet(path.toString).createOrReplaceTempView("t1") // Test case 1: value found -> returns true - checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, 2, 3), 2) FROM t1")) + // Use column references to avoid constant folding + checkSparkAnswerAndOperator(sql( + "SELECT array_contains(array(_2, _3, _4), _3) FROM t1 WHERE _2 = 1 AND _3 = 2 AND _4 = 3")) // Test case 2: no match, no nulls -> returns false - checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, 2, 3), 5) FROM t1")) + checkSparkAnswerAndOperator(sql( + "SELECT array_contains(array(_2, _3, _4), 999) FROM t1 WHERE _2 = 1 AND _3 = 2 AND _4 = 3 AND _2 IS NOT NULL AND _3 IS NOT NULL AND _4 IS NOT NULL")) // Test case 3: no match, but null exists -> returns null (indeterminate) - checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, null, 3), 2) FROM t1")) + // Use CASE to create array with null to avoid constant folding + checkSparkAnswerAndOperator(sql( + "SELECT array_contains(CASE WHEN _2 >= 0 THEN array(_2, cast(null as int), _4) ELSE array(_2, cast(null as int), _4) END, 999) FROM t1 WHERE _2 = 1 AND _4 = 3 AND _2 IS NOT NULL AND _4 IS NOT NULL")) // Test case 4: match found even with nulls -> returns true - checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, null, 3), 1) FROM t1")) + checkSparkAnswerAndOperator(sql( + "SELECT array_contains(CASE WHEN _2 >= 0 THEN array(_2, cast(null as int), _4) ELSE array(_2, cast(null as int), _4) END, _2) FROM t1 WHERE _2 = 1 AND _4 = 3 AND _2 IS NOT NULL AND _4 IS NOT NULL")) // Test case 5: search value is null -> returns null - checkSparkAnswerAndOperator( - sql("SELECT array_contains(array(1, 2, 3), cast(null as int)) FROM t1")) + checkSparkAnswerAndOperator(sql( + "SELECT array_contains(array(_2, _3, _4), cast(null as int)) FROM t1 WHERE _2 = 1 AND _3 = 2 AND _4 = 3")) // Test case 6: array with nulls, searching for existing value -> returns true - checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, null, 3), 3) FROM t1")) + checkSparkAnswerAndOperator(sql( + "SELECT array_contains(CASE WHEN _2 >= 0 THEN array(_2, cast(null as int), _4) ELSE array(_2, cast(null as int), _4) END, _4) FROM t1 WHERE _2 = 1 AND _4 = 3 AND _2 IS NOT NULL AND _4 IS NOT NULL")) // Test case 7: empty array -> returns false - checkSparkAnswerAndOperator(sql("SELECT array_contains(array(), 1) FROM t1")) + // Use conditional logic to create empty array to avoid constant folding + checkSparkAnswerAndOperator(sql( + "SELECT array_contains(CASE WHEN _2 < 0 THEN array(_2) ELSE array() END, 1) FROM t1 WHERE _2 >= 0")) } } } From 97419fc8812db9d76f3848e8764dd5ed10d423dd Mon Sep 17 00:00:00 2001 From: Raushan Prabhakar Date: Sat, 17 Jan 2026 19:28:49 +0530 Subject: [PATCH 3/5] Improving test case to ensure the test exercises the native Comet execution path rather than being optimized away by Spark's constant folding optimizer. --- .../comet/CometArrayExpressionSuite.scala | 97 +++++++++++-------- 1 file changed, 55 insertions(+), 42 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala index e2fe83eba1..cfdb074d93 100644 --- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala @@ -54,6 +54,36 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp } } + test("array_remove - remove null elements") { + // Test that array_remove(arr, null) removes all null elements from the array + // This is the fix for https://github.com/apache/datafusion-comet/issues/3173 + Seq(true, false).foreach { dictionaryEnabled => + withTempView("t1") { + withTempDir { dir => + val path = new Path(dir.toURI.toString, "test.parquet") + makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled, 100) + spark.read.parquet(path.toString).createOrReplaceTempView("t1") + // Test with array containing nulls and removing null + checkSparkAnswerAndOperator( + sql("SELECT array_remove(array(_2, null, _3, null, _4), null) from t1")) + // Disable constant folding for literal tests to ensure Comet implementation is exercised + withSQLConf( + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> + "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") { + // Test with literal array: array_remove(array(1, null, 2, null, 3), null) should return [1, 2, 3] + checkSparkAnswerAndOperator( + sql("SELECT array_remove(array(1, null, 2, null, 3), null) from t1")) + // Test with all nulls - should return empty array + checkSparkAnswerAndOperator( + sql("SELECT array_remove(array(null, null, null), null) from t1")) + // Test with no nulls - should return original array + checkSparkAnswerAndOperator(sql("SELECT array_remove(array(1, 2, 3), null) from t1")) + } + } + } + } + } + test("array_remove - test all types (native Parquet reader)") { withTempDir { dir => withTempView("t1") { @@ -324,49 +354,32 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp } } } - - test("array_contains - three-valued null logic") { - // Test Spark's three-valued logic for array_contains: - // 1. Returns true if value is found - // 2. Returns false if no match found AND no null elements exist - // 3. Returns null if no match found BUT null elements exist (indeterminate) - // 4. Returns null if search value is null - withTempDir { dir => + test("array_remove - remove null elements") { + // Test that array_remove(arr, null) removes all null elements from the array + // This is the fix for https://github.com/apache/datafusion-comet/issues/3173 + Seq(true, false).foreach { dictionaryEnabled => withTempView("t1") { - val path = new Path(dir.toURI.toString, "test.parquet") - makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = false, n = 100) - spark.read.parquet(path.toString).createOrReplaceTempView("t1") - - // Test case 1: value found -> returns true - // Use column references to avoid constant folding - checkSparkAnswerAndOperator(sql( - "SELECT array_contains(array(_2, _3, _4), _3) FROM t1 WHERE _2 = 1 AND _3 = 2 AND _4 = 3")) - - // Test case 2: no match, no nulls -> returns false - checkSparkAnswerAndOperator(sql( - "SELECT array_contains(array(_2, _3, _4), 999) FROM t1 WHERE _2 = 1 AND _3 = 2 AND _4 = 3 AND _2 IS NOT NULL AND _3 IS NOT NULL AND _4 IS NOT NULL")) - - // Test case 3: no match, but null exists -> returns null (indeterminate) - // Use CASE to create array with null to avoid constant folding - checkSparkAnswerAndOperator(sql( - "SELECT array_contains(CASE WHEN _2 >= 0 THEN array(_2, cast(null as int), _4) ELSE array(_2, cast(null as int), _4) END, 999) FROM t1 WHERE _2 = 1 AND _4 = 3 AND _2 IS NOT NULL AND _4 IS NOT NULL")) - - // Test case 4: match found even with nulls -> returns true - checkSparkAnswerAndOperator(sql( - "SELECT array_contains(CASE WHEN _2 >= 0 THEN array(_2, cast(null as int), _4) ELSE array(_2, cast(null as int), _4) END, _2) FROM t1 WHERE _2 = 1 AND _4 = 3 AND _2 IS NOT NULL AND _4 IS NOT NULL")) - - // Test case 5: search value is null -> returns null - checkSparkAnswerAndOperator(sql( - "SELECT array_contains(array(_2, _3, _4), cast(null as int)) FROM t1 WHERE _2 = 1 AND _3 = 2 AND _4 = 3")) - - // Test case 6: array with nulls, searching for existing value -> returns true - checkSparkAnswerAndOperator(sql( - "SELECT array_contains(CASE WHEN _2 >= 0 THEN array(_2, cast(null as int), _4) ELSE array(_2, cast(null as int), _4) END, _4) FROM t1 WHERE _2 = 1 AND _4 = 3 AND _2 IS NOT NULL AND _4 IS NOT NULL")) - - // Test case 7: empty array -> returns false - // Use conditional logic to create empty array to avoid constant folding - checkSparkAnswerAndOperator(sql( - "SELECT array_contains(CASE WHEN _2 < 0 THEN array(_2) ELSE array() END, 1) FROM t1 WHERE _2 >= 0")) + withTempDir { dir => + val path = new Path(dir.toURI.toString, "test.parquet") + makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled, 100) + spark.read.parquet(path.toString).createOrReplaceTempView("t1") + // Test with array containing nulls and removing null + checkSparkAnswerAndOperator( + sql("SELECT array_remove(array(_2, null, _3, null, _4), null) from t1")) + // Disable constant folding for literal tests to ensure Comet implementation is exercised + withSQLConf( + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> + "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") { + // Test with literal array: array_remove(array(1, null, 2, null, 3), null) should return [1, 2, 3] + checkSparkAnswerAndOperator( + sql("SELECT array_remove(array(1, null, 2, null, 3), null) from t1")) + // Test with all nulls - should return empty array + checkSparkAnswerAndOperator( + sql("SELECT array_remove(array(null, null, null), null) from t1")) + // Test with no nulls - should return original array + checkSparkAnswerAndOperator(sql("SELECT array_remove(array(1, 2, 3), null) from t1")) + } + } } } } From a370ba686cb000b1cb690f0d0f17fbc19efcf8fd Mon Sep 17 00:00:00 2001 From: Raushan Prabhakar Date: Sat, 17 Jan 2026 19:34:01 +0530 Subject: [PATCH 4/5] removing duplicate test case implementation --- .../comet/CometArrayExpressionSuite.scala | 29 ------------------- 1 file changed, 29 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala index cfdb074d93..a60c1c0b4c 100644 --- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala @@ -354,35 +354,6 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp } } } - test("array_remove - remove null elements") { - // Test that array_remove(arr, null) removes all null elements from the array - // This is the fix for https://github.com/apache/datafusion-comet/issues/3173 - Seq(true, false).foreach { dictionaryEnabled => - withTempView("t1") { - withTempDir { dir => - val path = new Path(dir.toURI.toString, "test.parquet") - makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled, 100) - spark.read.parquet(path.toString).createOrReplaceTempView("t1") - // Test with array containing nulls and removing null - checkSparkAnswerAndOperator( - sql("SELECT array_remove(array(_2, null, _3, null, _4), null) from t1")) - // Disable constant folding for literal tests to ensure Comet implementation is exercised - withSQLConf( - SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> - "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") { - // Test with literal array: array_remove(array(1, null, 2, null, 3), null) should return [1, 2, 3] - checkSparkAnswerAndOperator( - sql("SELECT array_remove(array(1, null, 2, null, 3), null) from t1")) - // Test with all nulls - should return empty array - checkSparkAnswerAndOperator( - sql("SELECT array_remove(array(null, null, null), null) from t1")) - // Test with no nulls - should return original array - checkSparkAnswerAndOperator(sql("SELECT array_remove(array(1, 2, 3), null) from t1")) - } - } - } - } - } test("array_contains - test all types (convert from Parquet)") { withTempDir { dir => From 50d7dfbffa063015656c31fcdbf49b4089efdadf Mon Sep 17 00:00:00 2001 From: Raushan Prabhakar Date: Mon, 19 Jan 2026 01:49:54 +0530 Subject: [PATCH 5/5] Improving test case implementation --- .../comet/CometArrayExpressionSuite.scala | 72 +++++++++++-------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala index a60c1c0b4c..de1866a322 100644 --- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala @@ -54,36 +54,6 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp } } - test("array_remove - remove null elements") { - // Test that array_remove(arr, null) removes all null elements from the array - // This is the fix for https://github.com/apache/datafusion-comet/issues/3173 - Seq(true, false).foreach { dictionaryEnabled => - withTempView("t1") { - withTempDir { dir => - val path = new Path(dir.toURI.toString, "test.parquet") - makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled, 100) - spark.read.parquet(path.toString).createOrReplaceTempView("t1") - // Test with array containing nulls and removing null - checkSparkAnswerAndOperator( - sql("SELECT array_remove(array(_2, null, _3, null, _4), null) from t1")) - // Disable constant folding for literal tests to ensure Comet implementation is exercised - withSQLConf( - SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> - "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") { - // Test with literal array: array_remove(array(1, null, 2, null, 3), null) should return [1, 2, 3] - checkSparkAnswerAndOperator( - sql("SELECT array_remove(array(1, null, 2, null, 3), null) from t1")) - // Test with all nulls - should return empty array - checkSparkAnswerAndOperator( - sql("SELECT array_remove(array(null, null, null), null) from t1")) - // Test with no nulls - should return original array - checkSparkAnswerAndOperator(sql("SELECT array_remove(array(1, 2, 3), null) from t1")) - } - } - } - } - } - test("array_remove - test all types (native Parquet reader)") { withTempDir { dir => withTempView("t1") { @@ -355,6 +325,48 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp } } + test("array_contains - three-valued null logic") { + // Test Spark's three-valued logic for array_contains: + // 1. Returns true if value is found + // 2. Returns false if no match found AND no null elements exist + // 3. Returns null if no match found BUT null elements exist (indeterminate) + // 4. Returns null if search value is null + withTempDir { dir => + withTempView("t1") { + val path = new Path(dir.toURI.toString, "test.parquet") + makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = false, n = 100) + spark.read.parquet(path.toString).createOrReplaceTempView("t1") + + // Disable constant folding to ensure Comet implementation is exercised + withSQLConf( + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> + "org.apache.spark.sql.catalyst.optimizer.ConstantFolding") { + // Test case 1: value found -> returns true + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, 2, 3), 2) FROM t1")) + + // Test case 2: no match, no nulls -> returns false + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, 2, 3), 5) FROM t1")) + + // Test case 3: no match, but null exists -> returns null (indeterminate) + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, null, 3), 2) FROM t1")) + + // Test case 4: match found even with nulls -> returns true + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, null, 3), 1) FROM t1")) + + // Test case 5: search value is null -> returns null + checkSparkAnswerAndOperator( + sql("SELECT array_contains(array(1, 2, 3), cast(null as int)) FROM t1")) + + // Test case 6: array with nulls, searching for existing value -> returns true + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(1, null, 3), 3) FROM t1")) + + // Test case 7: empty array -> returns false + checkSparkAnswerAndOperator(sql("SELECT array_contains(array(), 1) FROM t1")) + } + } + } + } + test("array_contains - test all types (convert from Parquet)") { withTempDir { dir => val path = new Path(dir.toURI.toString, "test.parquet")