From 2badfefaa583c5455022903870222732c506ead4 Mon Sep 17 00:00:00 2001 From: minghong Date: Wed, 24 Dec 2025 15:02:13 +0800 Subject: [PATCH 01/21] eager agg from tpc_preview3 1224 --- .../doris/nereids/jobs/executor/Rewriter.java | 29 +- .../rules/analysis/NormalizeAggregate.java | 51 +-- .../nereids/rules/rewrite/AdjustNullable.java | 16 +- .../eageraggregation/EagerAggRewriter.java | 405 ++++++++++++++++++ .../eageraggregation/PushDownAggContext.java | 130 ++++++ .../eageraggregation/PushDownAggregation.java | 243 +++++++++++ .../PushdownSumIfAggregation.java | 152 +++++++ .../eageraggregation/SumAggContext.java | 56 +++ .../eageraggregation/SumAggWriter.java | 323 ++++++++++++++ .../nereids/stats/ExpressionEstimation.java | 2 +- .../plans/logical/LogicalSetOperation.java | 42 ++ .../trees/plans/logical/LogicalUnion.java | 8 + .../org/apache/doris/qe/SessionVariable.java | 9 + .../shape_check/tpcds_sf1000/shape/query2.out | 24 +- .../tpcds_sf1000/shape/query23.out | 12 +- .../shape_check/tpcds_sf1000/shape/query3.out | 19 +- .../tpcds_sf1000/shape/query31.out | 18 +- .../tpcds_sf1000/shape/query42.out | 9 +- .../tpcds_sf1000/shape/query52.out | 9 +- .../tpcds_sf1000/shape/query55.out | 9 +- .../tpcds_sf1000/shape/query59.out | 9 +- 21 files changed, 1492 insertions(+), 83 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index ed0fa310228a70..356da3576ceb7e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -127,7 +127,6 @@ import org.apache.doris.nereids.rules.rewrite.PullUpProjectUnderLimit; import org.apache.doris.nereids.rules.rewrite.PullUpProjectUnderTopN; import org.apache.doris.nereids.rules.rewrite.PushCountIntoUnionAll; -import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoinOnPkFk; import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoinOneSide; import org.apache.doris.nereids.rules.rewrite.PushDownAggWithDistinctThroughJoinOneSide; @@ -171,6 +170,8 @@ import org.apache.doris.nereids.rules.rewrite.batch.ApplyToJoin; import org.apache.doris.nereids.rules.rewrite.batch.CorrelateApplyToUnCorrelateApply; import org.apache.doris.nereids.rules.rewrite.batch.EliminateUselessPlanUnderApply; +import org.apache.doris.nereids.rules.rewrite.eageraggregation.PushDownAggregation; +import org.apache.doris.nereids.rules.rewrite.eageraggregation.PushdownSumIfAggregation; import org.apache.doris.nereids.trees.plans.algebra.SetOperation; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalApply; @@ -649,19 +650,6 @@ public class Rewriter extends AbstractBatchJobExecutor { new MergeAggregate() ) ), - topic("Eager aggregation", - cascadesContext -> cascadesContext.rewritePlanContainsTypes( - LogicalAggregate.class, LogicalJoin.class - ), - costBased(topDown( - new PushDownAggWithDistinctThroughJoinOneSide(), - new PushDownAggThroughJoinOneSide(), - new PushDownAggThroughJoin() - )), - costBased(custom(RuleType.PUSH_DOWN_DISTINCT_THROUGH_JOIN, PushDownDistinctThroughJoin::new)), - topDown(new PushCountIntoUnionAll()) - ), - // this rule should invoke after infer predicate and push down distinct, and before push down limit topic("eliminate join according unique or foreign key", cascadesContext -> cascadesContext.rewritePlanContainsTypes(LogicalJoin.class), @@ -678,7 +666,20 @@ public class Rewriter extends AbstractBatchJobExecutor { topDown(new PushDownAggThroughJoinOnPkFk()), topDown(new PullUpJoinFromUnionAll()) ), + topic("Eager aggregation", + cascadesContext -> cascadesContext.rewritePlanContainsTypes( + LogicalAggregate.class, LogicalJoin.class + ), + costBased(topDown( + new PushDownAggWithDistinctThroughJoinOneSide(), + new PushDownAggThroughJoinOneSide() + )), + costBased(custom(RuleType.PUSH_DOWN_DISTINCT_THROUGH_JOIN, PushDownDistinctThroughJoin::new)), + custom(RuleType.PUSH_DOWN_AGG_THROUGH_JOIN, PushdownSumIfAggregation::new), + custom(RuleType.PUSH_DOWN_AGG_THROUGH_JOIN, PushDownAggregation::new), + topDown(new PushCountIntoUnionAll()) + ), topic("Limit optimization", cascadesContext -> cascadesContext.rewritePlanContainsTypes(LogicalLimit.class) || cascadesContext.rewritePlanContainsTypes(LogicalTopN.class) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java index bdf4c3d80b6e36..0e24777bb17b5f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java @@ -129,33 +129,34 @@ public List buildRules() { .toRule(RuleType.NORMALIZE_AGGREGATE)); } + /** + * The LogicalAggregate node may contain window agg functions and usual agg functions + * we call window agg functions as window-agg and usual agg functions as trivial-agg for short + * This rule simplify LogicalAggregate node by: + * 1. Push down some exprs from old LogicalAggregate node to a new child LogicalProject Node, + * 2. create a new LogicalAggregate with normalized group by exprs and trivial-aggs + * 3. Pull up normalized old LogicalAggregate's output exprs to a new parent LogicalProject Node + * Push down exprs: + * 1. all group by exprs + * 2. child contains subquery expr in trivial-agg + * 3. child contains window expr in trivial-agg + * 4. all input slots of trivial-agg + * 5. expr(including subquery) in distinct trivial-agg + * Normalize LogicalAggregate's output. + * 1. normalize group by exprs by outputs of bottom LogicalProject + * 2. normalize trivial-aggs by outputs of bottom LogicalProject + * 3. build normalized agg outputs + * Pull up exprs: + * normalize all output exprs in old LogicalAggregate to build a parent project node, typically includes: + * 1. simple slots + * 2. aliases + * a. alias with no aggs child + * b. alias with trivial-agg child + * c. alias with window-agg + */ @SuppressWarnings("checkstyle:UnusedLocalVariable") - private LogicalPlan normalizeAgg(LogicalAggregate aggregate, Optional> having, + public LogicalPlan normalizeAgg(LogicalAggregate aggregate, Optional> having, CascadesContext ctx) { - // The LogicalAggregate node may contain window agg functions and usual agg functions - // we call window agg functions as window-agg and usual agg functions as trivial-agg for short - // This rule simplify LogicalAggregate node by: - // 1. Push down some exprs from old LogicalAggregate node to a new child LogicalProject Node, - // 2. create a new LogicalAggregate with normalized group by exprs and trivial-aggs - // 3. Pull up normalized old LogicalAggregate's output exprs to a new parent LogicalProject Node - // Push down exprs: - // 1. all group by exprs - // 2. child contains subquery expr in trivial-agg - // 3. child contains window expr in trivial-agg - // 4. all input slots of trivial-agg - // 5. expr(including subquery) in distinct trivial-agg - // Normalize LogicalAggregate's output. - // 1. normalize group by exprs by outputs of bottom LogicalProject - // 2. normalize trivial-aggs by outputs of bottom LogicalProject - // 3. build normalized agg outputs - // Pull up exprs: - // normalize all output exprs in old LogicalAggregate to build a parent project node, typically includes: - // 1. simple slots - // 2. aliases - // a. alias with no aggs child - // b. alias with trivial-agg child - // c. alias with window-agg - // Push down exprs: // collect group by exprs Set groupingByExprs = Utils.fastToImmutableSet(aggregate.getGroupByExpressions()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java index 204a018fbc7cb5..14973c986f888c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java @@ -80,8 +80,20 @@ public class AdjustNullable extends DefaultPlanRewriter> imple private final boolean isAnalyzedPhase; - public AdjustNullable(boolean isAnalyzedPhase) { + /** + * When check is true, if we find a slot that is non-nullable in the plan, + * but we infer it should be nullable from the plan's subtree, and fe_debug is true, + * then throw an exception. + */ + private final boolean check; + + public AdjustNullable(boolean isAnalyzedPhase, boolean check) { this.isAnalyzedPhase = isAnalyzedPhase; + this.check = check; + } + + public AdjustNullable(boolean isAnalyzedPhase) { + this(isAnalyzedPhase, !isAnalyzedPhase); } @Override @@ -496,7 +508,7 @@ private Optional updateExpression(Optional input, private Optional updateExpression(T input, Map replaceMap, boolean debugCheck) { AtomicBoolean changed = new AtomicBoolean(false); - Expression replaced = doUpdateExpression(changed, input, replaceMap, !isAnalyzedPhase && debugCheck); + Expression replaced = doUpdateExpression(changed, input, replaceMap, check && debugCheck); return changed.get() ? Optional.of((T) replaced) : Optional.empty(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java new file mode 100644 index 00000000000000..f7ed45777f4e6c --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java @@ -0,0 +1,405 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.eageraggregation; + +import org.apache.doris.nereids.rules.analysis.NormalizeAggregate; +import org.apache.doris.nereids.rules.rewrite.StatsDerive; +import org.apache.doris.nereids.stats.ExpressionEstimation; +// import org.apache.doris.nereids.stats.StatsCalculator; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.SessionVariable; +import org.apache.doris.statistics.ColumnStatistic; +import org.apache.doris.statistics.Statistics; + +import com.google.common.collect.Lists; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * eager aggregation + * agg[sum(t1.A) group by t1.B] + * ->join(t1.C=t2.D) + * ->T1(A, B, C) + * ->T2(D) + * + * => + * agg[sum(x) group by t1.B] + * ->join(t1.C=t2.D) + * ->agg[sum(A) as x, group by B] + * ->T1(A, B, C) + * ->T2(D) + */ +public class EagerAggRewriter extends DefaultPlanRewriter { + private static final double LOWER_AGGREGATE_EFFECT_COEFFICIENT = 10000; + private static final double LOW_AGGREGATE_EFFECT_COEFFICIENT = 1000; + private static final double MEDIUM_AGGREGATE_EFFECT_COEFFICIENT = 100; + private final StatsDerive derive = new StatsDerive(true); + + @Override + public Plan visitLogicalJoin(LogicalJoin join, PushDownAggContext context) { + List pushToLeft = new ArrayList<>(); + List pushToRight = new ArrayList<>(); + boolean toLeft = true; + boolean toRight = true; + for (AggregateFunction aggFunc : context.getAggFunctions()) { + if (join.left().getOutputSet().containsAll(aggFunc.getInputSlots())) { + pushToLeft.add(aggFunc); + toRight = false; + } else if (join.right().getOutputSet().containsAll(aggFunc.getInputSlots())) { + pushToRight.add(aggFunc); + toLeft = false; + } + if (toLeft == toRight) { + return join; + } + } + + List joinConditionSlots; + List childGroupByKeys = new ArrayList<>(); + if (toLeft) { + joinConditionSlots = getJoinConditionsInputSlotsFromOneSide(join, join.left()); + for (NamedExpression key : context.getGroupKeys()) { + if (join.left().getOutputSet().containsAll(key.getInputSlots())) { + childGroupByKeys.add(key); + } + } + } else { + joinConditionSlots = getJoinConditionsInputSlotsFromOneSide(join, join.right()); + for (NamedExpression key : context.getGroupKeys()) { + if (join.right().getOutputSet().containsAll(key.getInputSlots())) { + childGroupByKeys.add(key); + } + } + } + + for (SlotReference slot : joinConditionSlots) { + if (!childGroupByKeys.contains(slot)) { + childGroupByKeys.add(slot); + } + } + + PushDownAggContext childContext = context.withGroupKeys(childGroupByKeys); + Statistics stats = join.right().getStats(); + if (stats == null) { + stats = join.right().accept(derive, new StatsDerive.DeriveContext()); + } + if (stats.getRowCount() > PushDownAggContext.BIG_JOIN_BUILD_SIZE) { + childContext = childContext.passThroughBigJoin(); + } + if (toLeft) { + Plan newLeft = join.left().accept(this, childContext); + if (newLeft != join.left()) { + context.getFinalGroupKeys().addAll(childContext.getFinalGroupKeys()); + return join.withChildren(newLeft, join.right()); + } + } else { + Plan newRight = join.right().accept(this, childContext); + if (newRight != join.right()) { + context.getFinalGroupKeys().addAll(childContext.getFinalGroupKeys()); + return join.withChildren(join.left(), newRight); + } + } + return join; + } + + private List getJoinConditionsInputSlotsFromOneSide(LogicalJoin join, + Plan side) { + List oneSideSlots = new ArrayList<>(); + for (Expression condition : join.getHashJoinConjuncts()) { + for (Slot slot : condition.getInputSlots()) { + if (side.getOutputSet().contains(slot)) { + oneSideSlots.add((SlotReference) slot); + } + } + } + for (Expression condition : join.getOtherJoinConjuncts()) { + for (Slot slot : condition.getInputSlots()) { + if (side.getOutputSet().contains(slot)) { + oneSideSlots.add((SlotReference) slot); + } + } + } + return oneSideSlots; + } + + private PushDownAggContext createContextFromProject(LogicalProject project, + PushDownAggContext context) { + HashMap replaceMapAliasBody = new HashMap<>(); + HashMap replaceMapAlias = new HashMap<>(); + for (NamedExpression ne : project.getProjects()) { + if (ne instanceof Alias) { + replaceMapAliasBody.put(ne.toSlot(), ((Alias) ne).child()); + replaceMapAlias.put(ne.toSlot(), ne); + } + } + + /* + * context: sum(a) groupBy(y+z as x, l) + * proj: b+c as a, u+v as y, m+n as l + * newContext: sum(b+c), groupBy((u+v)+z as x, m+n as l) + */ + + List groupKeys = new ArrayList<>(); + for (NamedExpression key : context.getGroupKeys()) { + NamedExpression newKey; + if (key instanceof Alias) { + newKey = (Alias) ExpressionUtils.replace(key, replaceMapAliasBody); + } else { + // key is slot + newKey = (NamedExpression) replaceMapAlias.getOrDefault(key, key); + } + groupKeys.add(newKey); + } + + List aggFunctions = new ArrayList<>(); + Map aliasMap = new HashMap<>(); + for (AggregateFunction aggFunc : context.getAggFunctions()) { + AggregateFunction newAggFunc = (AggregateFunction) ExpressionUtils.replace(aggFunc, replaceMapAliasBody); + Alias alias = context.getAliasMap().get(aggFunc); + aliasMap.put(newAggFunc, (Alias) alias.withChildren(newAggFunc)); + aggFunctions.add(newAggFunc); + } + return new PushDownAggContext(aggFunctions, groupKeys, aliasMap, + context.getCascadesContext(), context.isPassThroughBigJoin()); + } + + @Override + public Plan visitLogicalProject(LogicalProject project, PushDownAggContext context) { + if (project.child() instanceof LogicalCatalogRelation + || (project.child() instanceof LogicalFilter + && project.child().child(0) instanceof LogicalCatalogRelation)) { + // project + // --> scan + // => + // aggregate + // --> project + // --> scan + return genAggregate(project, context); + } + + // check validation + // all slots in context are projected + List slotsInContext = context.getGroupKeys().stream() + .flatMap(e -> e.getInputSlots().stream()).collect(Collectors.toList()); + slotsInContext.addAll(context.getAggFunctionsInputSlots()); + for (Slot slot : slotsInContext) { + if (!project.getOutputSet().contains(slot)) { + if (SessionVariable.isFeDebug()) { + throw new RuntimeException("push down failed: " + slot + " is not in project \n" + + project.treeString()); + } else { + return project; + } + } + } + + PushDownAggContext newContext = createContextFromProject(project, context); + Plan newChild = project.child().accept(this, newContext); + if (newChild != project.child()) { + context.getFinalGroupKeys().addAll(newContext.getFinalGroupKeys()); + /* + * agg[sum(a), groupBy(b)] + * -> proj(a, b1+b2 as b) + * -> join(c = d) + * -> any(a, b1, b2, c,...) + * -> any(d, ...) + * => + * agg[sum(x), groupBy(b)] + * -> proj(x, b) + * -> join(c=d) + * ->agg[sum(a) as x, groupBy(b, c)] + * ->proj(a, b1+b2 as b, c, ...) + * -> any(a, b1, b2, c) + * -> any(d, ...) + */ + Set aggFuncInputSlots = context.getAggFunctionsInputSlots(); + List newProjections = new ArrayList<>(); + for (NamedExpression ne : project.getProjects()) { + if (aggFuncInputSlots.contains(ne.toSlot())) { + // ne (a) is replaced by alias slot (x) + continue; + } else if (context.getFinalGroupKeys().contains(ne.toSlot())) { + newProjections.add(ne.toSlot()); + } else { + newProjections.add(ne); + } + } + for (Alias alias : context.getAliasMap().values()) { + newProjections.add(alias.toSlot()); + } + for (SlotReference key : context.getFinalGroupKeys()) { + if (!newProjections.contains(key)) { + newProjections.add(key); + } + } + + return project.withProjectsAndChild(newProjections, newChild); + } + + return project; + } + + @Override + public Plan visitLogicalAggregate(LogicalAggregate agg, PushDownAggContext context) { + return agg; + } + + @Override + public Plan visitLogicalFilter(LogicalFilter filter, PushDownAggContext context) { + return genAggregate(filter, context); + } + + @Override + public Plan visitLogicalRelation(LogicalRelation relation, PushDownAggContext context) { + return genAggregate(relation, context); + } + + private Plan genAggregate(Plan child, PushDownAggContext context) { + if (checkStats(child, context)) { + List aggOutputExpressions = new ArrayList<>(); + aggOutputExpressions.addAll(context.getAliasMap().values()); + aggOutputExpressions.addAll(context.getGroupKeys()); + for (NamedExpression key : context.getGroupKeys()) { + context.addFinalGroupKey((SlotReference) key.toSlot()); + } + LogicalAggregate genAgg = new LogicalAggregate(context.getGroupKeys(), aggOutputExpressions, child); + NormalizeAggregate normalizeAggregate = new NormalizeAggregate(); + return normalizeAggregate.normalizeAgg(genAgg, Optional.empty(), + context.getCascadesContext()); + } else { + return child; + } + } + + private boolean checkStats(Plan plan, PushDownAggContext context) { + if (!context.isPassThroughBigJoin()) { + return false; + } + if (ConnectContext.get() == null) { + return false; + } + int mode = ConnectContext.get().getSessionVariable().eagerAggregationMode; + if (mode < 0) { + return false; + } + if (mode > 0) { + return true; + } + Statistics stats = plan.getStats(); + if (stats == null) { + stats = plan.accept(derive, new StatsDerive.DeriveContext()); + } + if (stats.getRowCount() == 0) { + return false; + } + + List groupKeysStats = new ArrayList<>(); + + List lower = Lists.newArrayList(); + List medium = Lists.newArrayList(); + List high = Lists.newArrayList(); + + List[] cards = new List[] {lower, medium, high}; + + for (NamedExpression key : context.getGroupKeys()) { + ColumnStatistic colStats = ExpressionEstimation.INSTANCE.estimate(key, stats); + if (colStats.isUnKnown) { + return false; + } + groupKeysStats.add(colStats); + cards[groupByCardinality(colStats, stats.getRowCount())].add(colStats); + } + + double lowerCartesian = 1.0; + for (ColumnStatistic colStats : lower) { + lowerCartesian = lowerCartesian * colStats.ndv; + } + + // pow(row_count/20, a half of lower column size) + double lowerUpper = Math.max(stats.getRowCount() / 20, 1); + lowerUpper = Math.pow(lowerUpper, Math.max(lower.size() / 2, 1)); + + if (high.isEmpty() && (lower.size() + medium.size()) <= 2) { + return true; + } + + if (high.isEmpty() && medium.isEmpty()) { + if (lower.size() == 1 && lowerCartesian * 20 <= stats.getRowCount()) { + return true; + } else if (lower.size() == 2 && lowerCartesian * 7 <= stats.getRowCount()) { + return true; + } else if (lower.size() <= 3 && lowerCartesian * 20 <= stats.getRowCount() && lowerCartesian < lowerUpper) { + return true; + } else { + return false; + } + } + + if (high.size() >= 2 || medium.size() > 2 || (high.size() == 1 && !medium.isEmpty())) { + return false; + } + + // 3. Extremely low cardinality for lower with at most one medium or high. + double lowerCartesianLowerBound = + stats.getRowCount() / LOWER_AGGREGATE_EFFECT_COEFFICIENT; + if (high.size() + medium.size() == 1 && lower.size() <= 2 && lowerCartesian <= lowerCartesianLowerBound) { + return true; + // StatsCalculator statsCalculator = new StatsCalculator(null); + // double estAggRowCount = statsCalculator.estimateGroupByRowCount(context.getGroupKeys(), stats); + // return estAggRowCount < lowerCartesianLowerBound; + } + + return false; + } + + // high(2): row_count / cardinality < MEDIUM_AGGREGATE_EFFECT_COEFFICIENT + // medium(1): row_count / cardinality >= MEDIUM_AGGREGATE_EFFECT_COEFFICIENT and < LOW_AGGREGATE_EFFECT_COEFFICIENT + // lower(0): row_count / cardinality >= LOW_AGGREGATE_EFFECT_COEFFICIENT + private int groupByCardinality(ColumnStatistic colStats, double rowCount) { + if (rowCount == 0 || colStats.ndv * MEDIUM_AGGREGATE_EFFECT_COEFFICIENT > rowCount) { + return 2; + } else if (colStats.ndv * MEDIUM_AGGREGATE_EFFECT_COEFFICIENT <= rowCount + && colStats.ndv * LOW_AGGREGATE_EFFECT_COEFFICIENT > rowCount) { + return 1; + } else if (colStats.ndv * LOW_AGGREGATE_EFFECT_COEFFICIENT <= rowCount) { + return 0; + } + return 2; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java new file mode 100644 index 00000000000000..e9475bf7ac1d4c --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.eageraggregation; + +import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * PushDownAggContext + */ +public class PushDownAggContext { + public static final int BIG_JOIN_BUILD_SIZE = 400_000; + private final List aggFunctions; + private final List groupKeys; + private final Map aliasMap; + private final Set aggFunctionsInputSlots; + + // the group keys that eventually used to generate aggregation node + private final LinkedHashSet finalGroupKeys = new LinkedHashSet<>(); + + // cascadesContext is used for normalizeAgg + private final CascadesContext cascadesContext; + + private final boolean passThroughBigJoin; + + /** + * constructor + */ + public PushDownAggContext(List aggFunctions, + List groupKeys, + CascadesContext cascadesContext) { + this(aggFunctions, groupKeys, null, cascadesContext, false); + } + + /** + * constructor + */ + public PushDownAggContext(List aggFunctions, + List groupKeys, Map aliasMap, CascadesContext cascadesContext, + boolean passThroughBigJoin) { + this.groupKeys = groupKeys; + this.aggFunctions = ImmutableList.copyOf(aggFunctions); + this.cascadesContext = cascadesContext; + + if (aliasMap == null) { + ImmutableMap.Builder aliasMapBuilder = ImmutableMap.builder(); + for (AggregateFunction aggFunction : this.aggFunctions) { + Alias alias = new Alias(aggFunction, aggFunction.getName()); + aliasMapBuilder.put(aggFunction, alias); + } + this.aliasMap = aliasMapBuilder.build(); + } else { + this.aliasMap = aliasMap; + } + + this.aggFunctionsInputSlots = aggFunctions.stream() + .flatMap(aggFunction -> aggFunction.getInputSlots().stream()) + .filter(Slot.class::isInstance) + .collect(ImmutableSet.toImmutableSet()); + this.passThroughBigJoin = passThroughBigJoin; + } + + public PushDownAggContext passThroughBigJoin() { + return new PushDownAggContext(aggFunctions, groupKeys, aliasMap, cascadesContext, true); + } + + public Map getAliasMap() { + return aliasMap; + } + + public List getAggFunctions() { + return aggFunctions; + } + + public List getGroupKeys() { + return groupKeys; + } + + public PushDownAggContext withGroupKeys(List groupKeys) { + return new PushDownAggContext(aggFunctions, groupKeys, aliasMap, cascadesContext, passThroughBigJoin); + } + + public Set getAggFunctionsInputSlots() { + return aggFunctionsInputSlots; + } + + public LinkedHashSet getFinalGroupKeys() { + return finalGroupKeys; + } + + public void addFinalGroupKey(SlotReference key) { + this.finalGroupKeys.add(key); + } + + public CascadesContext getCascadesContext() { + return cascadesContext; + } + + public boolean isPassThroughBigJoin() { + return passThroughBigJoin; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java new file mode 100644 index 00000000000000..221b234117ac4a --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java @@ -0,0 +1,243 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.eageraggregation; + +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.rules.analysis.NormalizeAggregate; +import org.apache.doris.nereids.rules.rewrite.AdjustNullable; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Cast; +import org.apache.doris.nereids.trees.expressions.Divide; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; +import org.apache.doris.nereids.trees.expressions.functions.agg.Avg; +import org.apache.doris.nereids.trees.expressions.functions.agg.Count; +import org.apache.doris.nereids.trees.expressions.functions.agg.Max; +import org.apache.doris.nereids.trees.expressions.functions.agg.Min; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.SessionVariable; + +import com.google.common.collect.Sets; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +/** + * push down aggregation + */ +public class PushDownAggregation extends DefaultPlanRewriter implements CustomRewriter { + private static final Logger LOG = LoggerFactory.getLogger(PushDownAggregation.class); + + public final EagerAggRewriter writer = new EagerAggRewriter(); + + private final Set pushDownAggFunctionSet = Sets.newHashSet( + Sum.class, + Count.class, + Avg.class, + Max.class, + Min.class); + + private final Set acceptNodeType = Sets.newHashSet( + LogicalProject.class, + LogicalFilter.class, + LogicalRelation.class, + LogicalJoin.class); + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + int mode = ConnectContext.get().getSessionVariable().eagerAggregationMode; + if (mode < 0) { + return plan; + } else { + return plan.accept(this, jobContext); + } + } + + @Override + public Plan visitLogicalAggregate(LogicalAggregate agg, JobContext context) { + Plan newChild = agg.child().accept(this, context); + if (newChild != agg.child()) { + // TODO : push down upper aggregations + return agg.withChildren(newChild); + } + + if (agg.getSourceRepeat().isPresent()) { + return agg; + } + + List aggFunctions = new ArrayList<>(); + + Map avgToSumCountMap = new HashMap<>(); + for (AggregateFunction aggFunction : agg.getAggregateFunctions()) { + if (pushDownAggFunctionSet.contains(aggFunction.getClass()) + && !aggFunction.isDistinct() + && (!(aggFunction instanceof Count) || (!((Count) aggFunction).isCountStar()))) { + if (aggFunction instanceof Avg) { + DataType targetType = aggFunction.getDataType(); + Sum sum = new Sum(aggFunction.child(0)); + Count count = new Count(aggFunction.child(0)); + if (!aggFunctions.contains(sum)) { + aggFunctions.add(sum); + } + if (!aggFunctions.contains(count)) { + aggFunctions.add(count); + } + Expression castSum = targetType.equals(sum.getDataType()) ? sum : new Cast(sum, targetType); + Expression castCount = targetType.equals(count.getDataType()) ? count : new Cast(count, targetType); + avgToSumCountMap.put((Avg) aggFunction, + new Divide(castSum, castCount)); + } else { + aggFunctions.add(aggFunction); + } + } else { + return agg; + } + } + + if (!checkSubTreePattern(agg.child())) { + return agg; + } + + List groupKeys = new ArrayList<>(); + for (Expression groupKey : agg.getGroupByExpressions()) { + if (groupKey instanceof SlotReference) { + groupKeys.add((SlotReference) groupKey); + } else { + if (SessionVariable.isFeDebug()) { + throw new RuntimeException("PushDownAggregation failed: agg is not normalized\n " + + agg.treeString()); + } else { + return agg; + } + } + } + + PushDownAggContext pushDownContext = new PushDownAggContext(new ArrayList<>(aggFunctions), + groupKeys, context.getCascadesContext()); + try { + Plan child = agg.child().accept(writer, pushDownContext); + if (child != agg.child()) { + // agg has been pushed down, rewrite agg output expressions + // before: agg[sum(A), by (B)] + // ->join(C=D) + // ->scan(T1[A...]) + // ->scan(T2) + // after: agg[sum(x), by(B)] + // ->join(C=D) + // ->agg[sum(A) as x, by(B,C)] + // ->scan(T1[A...]) + // ->scan(T2) + List newOutputExpressions = new ArrayList<>(); + for (NamedExpression ne : agg.getOutputExpressions()) { + if (ne instanceof SlotReference) { + newOutputExpressions.add(ne); + } else { + Expression rewriteAvgExpr = ExpressionUtils.replace(ne, avgToSumCountMap); + NamedExpression replaceAliasExpr = (NamedExpression) rewriteAvgExpr + .rewriteDownShortCircuit(e -> { + Alias alias = pushDownContext.getAliasMap().get(e); + if (alias != null) { + AggregateFunction aggFunction = (AggregateFunction) e; + return aggFunction.withChildren(alias.toSlot()); + } else { + return e; + } + }); + newOutputExpressions.add(replaceAliasExpr); + } + } + LogicalAggregate eagerAgg = + agg.withAggOutputChild(newOutputExpressions, child); + NormalizeAggregate normalizeAggregate = new NormalizeAggregate(); + LogicalPlan normalized = normalizeAggregate.normalizeAgg(eagerAgg, Optional.empty(), + context.getCascadesContext()); + AdjustNullable adjustNullable = new AdjustNullable(false, false); + return adjustNullable.rewriteRoot(normalized, null); + } + } catch (RuntimeException e) { + LOG.info("PushDownAggregation failed: " + e.getMessage() + "\n" + agg.treeString()); + } + return agg; + } + + private boolean checkSubTreePattern(Plan root) { + return containsPushDownJoin(root) + && isSPJ(root); + } + + private boolean containsPushDownJoin(Plan root) { + if (root instanceof LogicalJoin && !((LogicalJoin) root).isMarkJoin()) { + return true; + } + if (root.children().isEmpty()) { + return false; + } + return root.children().stream().anyMatch(this::containsPushDownJoin); + } + + private boolean isSPJ(Plan root) { + boolean accepted = acceptNodeType.stream() + .anyMatch(clazz -> clazz.isAssignableFrom(root.getClass())); + if (!accepted) { + return false; + } + for (Plan child : root.children()) { + if (!isSPJ(child)) { + return false; + } + } + return true; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java new file mode 100644 index 00000000000000..292caf230d0ca2 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.eageraggregation; + +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.expressions.functions.scalar.If; +import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.qe.SessionVariable; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +/** + * sum(if t1.a then t2.b) + */ +public class PushdownSumIfAggregation extends DefaultPlanRewriter implements CustomRewriter { + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + return plan.accept(this, jobContext); + } + + @Override + public Plan visitLogicalAggregate(LogicalAggregate agg, JobContext context) { + Plan newChild = agg.child().accept(this, context); + if (newChild != agg.child()) { + // TODO : push down upper aggregations + return agg.withChildren(newChild); + } + + if (agg.getSourceRepeat().isPresent()) { + return agg; + } + + List aliasToBePushDown = Lists.newArrayList(); + List ifConditions = Lists.newArrayList(); + List ifThenSlots = Lists.newArrayList(); + boolean patternMatch = true; + for (NamedExpression aggOutput : agg.getOutputExpressions()) { + if (aggOutput instanceof Alias) { + Expression body = aggOutput.child(0); + if (body instanceof Sum) { + Expression sumBody = ((Sum) body).child(); + if (sumBody instanceof If) { + If ifBody = (If) sumBody; + if (ifBody.child(0) instanceof EqualTo + && ifBody.child(1) instanceof SlotReference + && ifBody.child(2) instanceof NullLiteral) { + ifConditions.add((EqualTo) ifBody.child(0)); + ifThenSlots.add((SlotReference) ifBody.child(1)); + aliasToBePushDown.add(aggOutput); + continue; + } + } + } + patternMatch = false; + } + } + if (!patternMatch) { + return agg; + } + if (ifThenSlots.isEmpty()) { + return agg; + } + ifThenSlots = Lists.newArrayList(Sets.newHashSet(ifThenSlots)); + + List groupKeys = new ArrayList<>(); + for (Expression groupKey : agg.getGroupByExpressions()) { + if (groupKey instanceof SlotReference) { + groupKeys.add((SlotReference) groupKey); + } else { + if (SessionVariable.isFeDebug()) { + throw new RuntimeException("PushDownAggregation failed: agg is not normalized\n " + + agg.treeString()); + } else { + return agg; + } + } + } + + SumAggContext sumAggContext = new SumAggContext(aliasToBePushDown, ifConditions, ifThenSlots, groupKeys); + SumAggWriter writer = new SumAggWriter(); + Plan child = agg.child().accept(writer, sumAggContext); + if (child != agg.child()) { + List outputExpressions = agg.getOutputExpressions(); + List newOutputExpressions = new ArrayList<>(); + for (NamedExpression output : outputExpressions) { + if (output instanceof SlotReference) { + newOutputExpressions.add(output); + } else if (output instanceof Alias + && output.child(0) instanceof Sum + && output.child(0).child(0) instanceof If + && output.child(0).child(0).child(1) instanceof SlotReference) { + SlotReference targetSlot = (SlotReference) output.child(0).child(0).child(1); + Slot toReplace = null; + for (Slot slot : child.getOutput()) { + if (slot.getExprId().equals(targetSlot.getExprId())) { + toReplace = slot; + } + } + if (toReplace != null) { + Alias newOutput = (Alias) ((Alias) output).withChildren( + new Sum( + new If( + output.child(0).child(0).child(0), + toReplace, + new NullLiteral(toReplace.getDataType()) + ) + ) + ); + newOutputExpressions.add(newOutput); + } else { + return agg; + } + + } + } + return agg.withAggOutputChild(newOutputExpressions, child); + } + return agg; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java new file mode 100644 index 00000000000000..7b3e7ee948276c --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.eageraggregation; + +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.SlotReference; + +import com.google.common.collect.ImmutableList; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * SumAggContext + */ +public class SumAggContext { + public final List aliasToBePushDown; + public final List ifConditions; + public final List ifThenSlots; + public final List groupKeys; + + public SumAggContext(List aliasToBePushDown, + List ifConditions, List ifThenSlots, + List groupKeys) { + this.aliasToBePushDown = ImmutableList.copyOf(aliasToBePushDown); + this.ifConditions = ImmutableList.copyOf(ifConditions); + Set distinct = new HashSet<>(ifThenSlots); + this.ifThenSlots = ImmutableList.copyOf(distinct); + this.groupKeys = ImmutableList.copyOf(groupKeys); + } + + public SumAggContext withIfThenSlots(List ifThenSlots) { + return new SumAggContext(this.aliasToBePushDown, + this.ifConditions, + ifThenSlots, + this.groupKeys); + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java new file mode 100644 index 00000000000000..27a2165f42233a --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java @@ -0,0 +1,323 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.eageraggregation; + +import org.apache.doris.nereids.rules.rewrite.StatsDerive; +import org.apache.doris.nereids.stats.ExpressionEstimation; +import org.apache.doris.nereids.stats.StatsCalculator; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalUnion; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.statistics.ColumnStatistic; +import org.apache.doris.statistics.Statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * SumAggWriter + */ +public class SumAggWriter extends DefaultPlanRewriter { + private static final double LOWER_AGGREGATE_EFFECT_COEFFICIENT = 10000; + private static final double LOW_AGGREGATE_EFFECT_COEFFICIENT = 1000; + private static final double MEDIUM_AGGREGATE_EFFECT_COEFFICIENT = 100; + private final StatsDerive derive = new StatsDerive(true); + + @Override + public Plan visit(Plan plan, SumAggContext context) { + return plan; + } + + @Override + public Plan visitLogicalProject(LogicalProject project, SumAggContext context) { + if (project.getProjects().stream().allMatch(proj -> proj instanceof SlotReference + || (proj instanceof Alias && proj.child(0) instanceof SlotReference))) { + List slotToPush = new ArrayList<>(); + for (SlotReference slot : context.ifThenSlots) { + slotToPush.add((SlotReference) project.pushDownExpressionPastProject(slot)); + } + List groupBySlots = new ArrayList<>(); + for (SlotReference slot : context.groupKeys) { + groupBySlots.add((SlotReference) project.pushDownExpressionPastProject(slot)); + } + SumAggContext contextForChild = new SumAggContext( + context.aliasToBePushDown, + context.ifConditions, + slotToPush, + groupBySlots); + Plan child = project.child().accept(this, contextForChild); + if (child != project.child()) { + List newProjects = Lists.newArrayList(); + for (NamedExpression ne : project.getProjects()) { + newProjects.add((NamedExpression) replaceBySlots(ne, child.getOutput())); + } + return project.withProjects(newProjects).withChildren(child); + } + } + return project; + } + + private static Expression replaceBySlots(Expression expression, List slots) { + Map replaceMap = new HashMap<>(); + for (Slot slot1 : expression.getInputSlots()) { + for (Slot slot2 : slots) { + if (slot1.getExprId().asInt() == slot2.getExprId().asInt()) { + replaceMap.put(slot1, slot2); + } + } + } + Expression result = ExpressionUtils.replace(expression, replaceMap); + return result; + } + + @Override + public Plan visitLogicalJoin(LogicalJoin join, SumAggContext context) { + Set leftOutput = join.left().getOutputSet(); + Set conditionSlots = join.getConditionSlot().stream() + .map(slot -> (SlotReference) slot).collect(Collectors.toSet()); + for (Slot slot : context.ifThenSlots) { + if (conditionSlots.contains(slot)) { + return join; + } + } + Set conditionSlotsFromLeft = Sets.newHashSet(conditionSlots); + conditionSlotsFromLeft.retainAll(leftOutput); + for (SlotReference slot : context.groupKeys) { + if (leftOutput.contains(slot)) { + conditionSlotsFromLeft.add(slot); + } + } + if (leftOutput.containsAll(context.ifThenSlots)) { + SumAggContext contextForChild = new SumAggContext( + context.aliasToBePushDown, + context.ifConditions, + context.ifThenSlots, + Lists.newArrayList(conditionSlotsFromLeft) + ); + Plan left = join.left().accept(this, contextForChild); + if (join.left() != left) { + return join.withChildren(left, join.right()); + } + } + return join; + } + + @Override + public Plan visitLogicalUnion(LogicalUnion union, SumAggContext context) { + if (!union.getOutputSet().containsAll(context.ifThenSlots)) { + return union; + } + if (!union.getConstantExprsList().isEmpty()) { + return union; + } + + if (!union.getOutputs().stream().allMatch(e -> e instanceof SlotReference)) { + return union; + } + List newChildren = Lists.newArrayList(); + + boolean changed = false; + for (int i = 0; i < union.children().size(); i++) { + Plan child = union.children().get(i); + List ifThenSlotsForChild = new ArrayList<>(); + // List groupByForChild = new ArrayList<>(); + for (SlotReference slot : context.ifThenSlots) { + Expression pushed = union.pushDownExpressionPastSetOperator(slot, i); + if (pushed instanceof SlotReference) { + ifThenSlotsForChild.add((SlotReference) pushed); + } else { + return union; + } + } + int childIdx = i; + SumAggContext contextForChild = new SumAggContext( + context.aliasToBePushDown, + context.ifConditions, + ifThenSlotsForChild, + context.groupKeys.stream().map(slot + -> (SlotReference) union.pushDownExpressionPastSetOperator(slot, childIdx)) + .collect(Collectors.toList()) + ); + Plan newChild = child.accept(this, contextForChild); + if (newChild != child) { + changed = true; + } + newChildren.add(newChild); + } + if (changed) { + List> newRegularChildrenOutputs = Lists.newArrayList(); + for (int i = 0; i < newChildren.size(); i++) { + List childOutput = new ArrayList<>(); + for (SlotReference slot : union.getRegularChildOutput(i)) { + for (Slot c : newChildren.get(i).getOutput()) { + if (slot.equals(c)) { + childOutput.add((SlotReference) c); + break; + } + } + } + newRegularChildrenOutputs.add(childOutput); + } + List newOutputs = new ArrayList<>(); + for (int i = 0; i < union.getOutput().size(); i++) { + SlotReference originSlot = (SlotReference) union.getOutput().get(i); + DataType dataType = newRegularChildrenOutputs.get(0).get(i).getDataType(); + newOutputs.add(originSlot.withNullableAndDataType(originSlot.nullable(), dataType)); + } + return union.withChildrenAndOutputs(newChildren, newOutputs, newRegularChildrenOutputs); + } else { + return union; + } + } + + @Override + public Plan visitLogicalRelation(LogicalRelation relation, SumAggContext context) { + return genAggregate(relation, context); + } + + private Plan genAggregate(Plan child, SumAggContext context) { + if (checkStats(child, context)) { + List aggOutputExpressions = new ArrayList<>(); + for (SlotReference slot : context.ifThenSlots) { + Alias alias = new Alias(slot.getExprId(), new Sum(slot)); + aggOutputExpressions.add(alias); + } + aggOutputExpressions.addAll(context.groupKeys); + + LogicalAggregate genAgg = new LogicalAggregate(context.groupKeys, aggOutputExpressions, child); + return genAgg; + } else { + return child; + } + + } + + private boolean checkStats(Plan plan, SumAggContext context) { + if (ConnectContext.get() == null) { + return false; + } + int mode = ConnectContext.get().getSessionVariable().eagerAggregationMode; + if (mode < 0) { + return false; + } + if (mode > 0) { + return true; + } + Statistics stats = plan.getStats(); + if (stats == null) { + stats = plan.accept(derive, new StatsDerive.DeriveContext()); + } + if (stats.getRowCount() == 0) { + return false; + } + + List groupKeysStats = new ArrayList<>(); + + List lower = Lists.newArrayList(); + List medium = Lists.newArrayList(); + List high = Lists.newArrayList(); + + List[] cards = new List[] {lower, medium, high}; + + for (NamedExpression key : context.groupKeys) { + ColumnStatistic colStats = ExpressionEstimation.INSTANCE.estimate(key, stats); + if (colStats.isUnKnown) { + return false; + } + groupKeysStats.add(colStats); + cards[groupByCardinality(colStats, stats.getRowCount())].add(colStats); + } + + double lowerCartesian = 1.0; + for (ColumnStatistic colStats : lower) { + lowerCartesian = lowerCartesian * colStats.ndv; + } + + // pow(row_count/20, a half of lower column size) + double lowerUpper = Math.max(stats.getRowCount() / 20, 1); + lowerUpper = Math.pow(lowerUpper, Math.max(lower.size() / 2, 1)); + + if (high.isEmpty() && (lower.size() + medium.size()) == 1) { + return true; + } + + if (high.isEmpty() && medium.isEmpty()) { + if (lower.size() == 1 && lowerCartesian * 20 <= stats.getRowCount()) { + return true; + } else if (lower.size() == 2 && lowerCartesian * 7 <= stats.getRowCount()) { + return true; + } else if (lower.size() <= 3 && lowerCartesian * 20 <= stats.getRowCount() && lowerCartesian < lowerUpper) { + return true; + } else { + return false; + } + } + + if (high.size() >= 2 || medium.size() > 2 || (high.size() == 1 && !medium.isEmpty())) { + return false; + } + + // 3. Extremely low cardinality for lower with at most one medium or high. + double lowerCartesianLowerBound = + stats.getRowCount() / LOWER_AGGREGATE_EFFECT_COEFFICIENT; + if (high.size() + medium.size() == 1 && lower.size() <= 2 && lowerCartesian <= lowerCartesianLowerBound) { + StatsCalculator statsCalculator = new StatsCalculator(null); + double estAggRowCount = statsCalculator.estimateGroupByRowCount( + context.groupKeys.stream().map(s -> (Expression) s).collect(Collectors.toList()), + stats); + return estAggRowCount < lowerCartesianLowerBound; + } + + return false; + } + + // high(2): row_count / cardinality < MEDIUM_AGGREGATE_EFFECT_COEFFICIENT + // medium(1): row_count / cardinality >= MEDIUM_AGGREGATE_EFFECT_COEFFICIENT and < LOW_AGGREGATE_EFFECT_COEFFICIENT + // lower(0): row_count / cardinality >= LOW_AGGREGATE_EFFECT_COEFFICIENT + private int groupByCardinality(ColumnStatistic colStats, double rowCount) { + if (rowCount == 0 || colStats.ndv * MEDIUM_AGGREGATE_EFFECT_COEFFICIENT > rowCount) { + return 2; + } else if (colStats.ndv * MEDIUM_AGGREGATE_EFFECT_COEFFICIENT <= rowCount + && colStats.ndv * LOW_AGGREGATE_EFFECT_COEFFICIENT > rowCount) { + return 1; + } else if (colStats.ndv * LOW_AGGREGATE_EFFECT_COEFFICIENT <= rowCount) { + return 0; + } + return 2; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index a6e620b8efc839..020c0dd3d6a89a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -120,7 +120,7 @@ public class ExpressionEstimation extends ExpressionVisitor parentProjects) { public Optional processProject(List parentProjects) { return Optional.of(PushProjectThroughUnion.doPushProject(parentProjects, this)); } + + /** + * Push down expression past SetOperation to a specific child. + * + * This method maps the expression from the SetOperation's output slots + * to the corresponding child's output slots. + * + * Example: + * SetOperation outputs: [x, y] + * Child 0 outputs (regularChildrenOutputs[0]): [a, b] + * Child 1 outputs (regularChildrenOutputs[1]): [c, d] + * + * If expression is "x + 1": + * - For childIdx=0, return "a + 1" + * - For childIdx=1, return "c + 1" + * + * @param expression the expression to push down + * @param childIdx the index of the child to push down to + * @return the rewritten expression for the child, or null if childIdx is out of + * bounds + */ + public Expression pushDownExpressionPastSetOperator(Expression expression, int childIdx) { + // Check if childIdx is valid + if (childIdx < 0 || childIdx >= regularChildrenOutputs.size()) { + return null; + } + + // Build mapping from SetOperation output slots to child output slots + java.util.HashMap slotMapping = new java.util.HashMap<>(); + List childOutputs = regularChildrenOutputs.get(childIdx); + + // Map each output slot to the corresponding child slot + for (int i = 0; i < outputs.size() && i < childOutputs.size(); i++) { + Slot outputSlot = outputs.get(i).toSlot(); + SlotReference childSlot = childOutputs.get(i); + slotMapping.put(outputSlot, childSlot); + } + + // Replace slots in the expression using the mapping + return ExpressionUtils.replace(expression, slotMapping); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java index 1a1b3b2e84e639..7dc1507cc7a24b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java @@ -424,4 +424,12 @@ private static List castToCommonType(List row, } return changed ? castedRow.build() : row; } + + public LogicalSetOperation withChildrenAndOutputs(List children, List newOuptuts, + List> childrenOutputs) { + Preconditions.checkArgument(children.size() == childrenOutputs.size(), + "children size %s is not equals with children outputs size %s", + children.size(), childrenOutputs.size()); + return new LogicalUnion(qualifier, newOuptuts, childrenOutputs, constantExprsList, hasPushedFilter, children); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 818533a54f71a5..38f9e252f26d5d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -2183,6 +2183,15 @@ public boolean isEnableHboNonStrictMatchingMode() { @VariableMgr.VarAttr(name = DPHYPER_LIMIT) public int dphyperLimit = 1000; + @VariableMgr.VarAttr(name = "eager_aggregation_mode", needForward = true, + description = {"0: 根据统计信息决定是使用eager aggregation," + + "1: 强制使用 eager aggregation," + + "-1: 禁止使用 eager aggregation", + "0: Determine eager aggregation by statistics, " + + "1: force eager aggregation, " + + "-1: Prohibit eager aggregation "} + ) + public int eagerAggregationMode = 0; @VariableMgr.VarAttr( name = ENABLE_PAGE_CACHE, diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query2.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query2.out index b9857d349977f8..2522a1a9f3f342 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query2.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query2.out @@ -6,16 +6,22 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ------PhysicalDistribute[DistributionSpecHash] --------hashAgg[LOCAL] ----------PhysicalProject -------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = wscs.sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk,ws_sold_date_sk] ---------------PhysicalUnion -----------------PhysicalDistribute[DistributionSpecExecutionAny] -------------------PhysicalProject ---------------------PhysicalOlapScan[web_sales] apply RFs: RF0 -----------------PhysicalDistribute[DistributionSpecExecutionAny] -------------------PhysicalProject ---------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 +------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = wscs.sold_date_sk)) otherCondition=() build RFs:RF0 sold_date_sk->[d_date_sk] --------------PhysicalProject -----------------PhysicalOlapScan[date_dim] +----------------PhysicalOlapScan[date_dim] apply RFs: RF0 +--------------PhysicalUnion +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[web_sales] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[catalog_sales] --PhysicalResultSink ----PhysicalQuickSort[MERGE_SORT] ------PhysicalDistribute[DistributionSpecGather] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query23.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query23.out index f514575b3964ce..c38ee45f762950 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query23.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query23.out @@ -53,29 +53,29 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------hashAgg[LOCAL] ----------------PhysicalUnion ------------------PhysicalProject ---------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF5 cs_item_sk->[item_sk] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 +--------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF5 item_sk->[cs_item_sk] ----------------------PhysicalProject ------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[cs_bill_customer_sk] --------------------------PhysicalProject ----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 RF4 +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 RF4 RF5 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_moy = 7) and (date_dim.d_year = 2000)) ----------------------------------PhysicalOlapScan[date_dim] --------------------------PhysicalCteConsumer ( cteId=CTEId#2 ) +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ------------------PhysicalProject ---------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF8 ws_item_sk->[item_sk] -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF8 +--------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF8 item_sk->[ws_item_sk] ----------------------PhysicalProject ------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=() build RFs:RF7 c_customer_sk->[ws_bill_customer_sk] --------------------------PhysicalProject ----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ws_sold_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7 +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7 RF8 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_moy = 7) and (date_dim.d_year = 2000)) ----------------------------------PhysicalOlapScan[date_dim] --------------------------PhysicalCteConsumer ( cteId=CTEId#2 ) +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query3.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query3.out index e9c6ec79c33e7b..fae84ff1a42849 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query3.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query3.out @@ -9,15 +9,18 @@ PhysicalResultSink ------------PhysicalDistribute[DistributionSpecHash] --------------hashAgg[LOCAL] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ss_item_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 ------------------------PhysicalProject ---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 -------------------------PhysicalProject ---------------------------filter((item.i_manufact_id = 816)) -----------------------------PhysicalOlapScan[item] +--------------------------filter((dt.d_moy = 11)) +----------------------------PhysicalOlapScan[date_dim(dt)] --------------------PhysicalProject -----------------------filter((dt.d_moy = 11)) -------------------------PhysicalOlapScan[date_dim(dt)] +----------------------filter((item.i_manufact_id = 816)) +------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query31.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query31.out index 56ccb985a8c645..ddd91f4702c00c 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query31.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query31.out @@ -9,9 +9,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------PhysicalProject --------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] ---------------------PhysicalProject -----------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 --------------------PhysicalProject ----------------------filter((ss.d_year = 1999) and d_qoy IN (1, 2, 3)) ------------------------PhysicalOlapScan[date_dim] @@ -26,9 +29,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------PhysicalProject ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ws_bill_addr_sk] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk] -----------------------PhysicalProject -------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 ----------------------PhysicalProject ------------------------filter((ws.d_year = 1999) and d_qoy IN (1, 2, 3)) --------------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query42.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query42.out index 939c2713d64d44..68f93698b9a036 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query42.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query42.out @@ -11,9 +11,12 @@ PhysicalResultSink ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 ------------------------PhysicalProject --------------------------filter((item.i_manager_id = 1)) ----------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query52.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query52.out index e3177f96cfd44c..5401ac3a92a539 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query52.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query52.out @@ -11,9 +11,12 @@ PhysicalResultSink ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 ------------------------PhysicalProject --------------------------filter((item.i_manager_id = 1)) ----------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query55.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query55.out index 652a5dab8d16b2..b21e9d417e36dc 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query55.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query55.out @@ -11,9 +11,12 @@ PhysicalResultSink ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 ------------------------PhysicalProject --------------------------filter((item.i_manager_id = 52)) ----------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query59.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query59.out index f0567b8768bf8f..050957da0f0a7a 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query59.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query59.out @@ -6,9 +6,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------PhysicalDistribute[DistributionSpecHash] --------hashAgg[LOCAL] ----------PhysicalProject -------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] ---------------PhysicalProject -----------------PhysicalOlapScan[store_sales] apply RFs: RF0 +------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------PhysicalOlapScan[store_sales] apply RFs: RF0 --------------PhysicalProject ----------------PhysicalOlapScan[date_dim] --PhysicalResultSink From 649220ff16e0b7f2cc6962d50a33fba619cf841d Mon Sep 17 00:00:00 2001 From: englefly Date: Mon, 5 Jan 2026 15:00:34 +0800 Subject: [PATCH 02/21] shape with/without pkfk based on tpc_preview --- .../PushdownSumIfAggregation.java | 1 - .../tpcds_sf1000/shape/query23.out | 12 +- .../tpcds_sf1000/shape/query64.out | 129 +- .../tpcds_sf1000_nopkfk/shape/query1.out | 37 + .../tpcds_sf1000_nopkfk/shape/query10.out | 47 + .../tpcds_sf1000_nopkfk/shape/query11.out | 63 + .../tpcds_sf1000_nopkfk/shape/query12.out | 26 + .../tpcds_sf1000_nopkfk/shape/query13.out | 34 + .../tpcds_sf1000_nopkfk/shape/query14.out | 148 + .../tpcds_sf1000_nopkfk/shape/query15.out | 25 + .../tpcds_sf1000_nopkfk/shape/query16.out | 35 + .../tpcds_sf1000_nopkfk/shape/query17.out | 44 + .../tpcds_sf1000_nopkfk/shape/query18.out | 42 + .../tpcds_sf1000_nopkfk/shape/query19.out | 35 + .../tpcds_sf1000_nopkfk/shape/query2.out | 45 + .../tpcds_sf1000_nopkfk/shape/query20.out | 26 + .../tpcds_sf1000_nopkfk/shape/query21.out | 26 + .../tpcds_sf1000_nopkfk/shape/query22.out | 23 + .../tpcds_sf1000_nopkfk/shape/query23.out | 89 + .../tpcds_sf1000_nopkfk/shape/query24.out | 52 + .../tpcds_sf1000_nopkfk/shape/query25.out | 43 + .../tpcds_sf1000_nopkfk/shape/query26.out | 31 + .../tpcds_sf1000_nopkfk/shape/query27.out | 33 + .../tpcds_sf1000_nopkfk/shape/query28.out | 75 + .../tpcds_sf1000_nopkfk/shape/query29.out | 43 + .../tpcds_sf1000_nopkfk/shape/query3.out | 26 + .../tpcds_sf1000_nopkfk/shape/query30.out | 41 + .../tpcds_sf1000_nopkfk/shape/query31.out | 73 + .../tpcds_sf1000_nopkfk/shape/query32.out | 26 + .../tpcds_sf1000_nopkfk/shape/query33.out | 83 + .../tpcds_sf1000_nopkfk/shape/query34.out | 32 + .../tpcds_sf1000_nopkfk/shape/query35.out | 47 + .../tpcds_sf1000_nopkfk/shape/query36.out | 33 + .../tpcds_sf1000_nopkfk/shape/query37.out | 27 + .../tpcds_sf1000_nopkfk/shape/query38.out | 62 + .../tpcds_sf1000_nopkfk/shape/query39.out | 31 + .../tpcds_sf1000_nopkfk/shape/query4.out | 91 + .../tpcds_sf1000_nopkfk/shape/query40.out | 30 + .../tpcds_sf1000_nopkfk/shape/query41.out | 23 + .../tpcds_sf1000_nopkfk/shape/query42.out | 26 + .../tpcds_sf1000_nopkfk/shape/query43.out | 22 + .../tpcds_sf1000_nopkfk/shape/query44.out | 71 + .../tpcds_sf1000_nopkfk/shape/query45.out | 35 + .../tpcds_sf1000_nopkfk/shape/query46.out | 38 + .../tpcds_sf1000_nopkfk/shape/query47.out | 43 + .../tpcds_sf1000_nopkfk/shape/query48.out | 29 + .../tpcds_sf1000_nopkfk/shape/query49.out | 107 + .../tpcds_sf1000_nopkfk/shape/query5.out | 76 + .../tpcds_sf1000_nopkfk/shape/query50.out | 29 + .../tpcds_sf1000_nopkfk/shape/query51.out | 40 + .../tpcds_sf1000_nopkfk/shape/query52.out | 26 + .../tpcds_sf1000_nopkfk/shape/query53.out | 31 + .../tpcds_sf1000_nopkfk/shape/query54.out | 74 + .../tpcds_sf1000_nopkfk/shape/query55.out | 26 + .../tpcds_sf1000_nopkfk/shape/query56.out | 83 + .../tpcds_sf1000_nopkfk/shape/query57.out | 43 + .../tpcds_sf1000_nopkfk/shape/query58.out | 86 + .../tpcds_sf1000_nopkfk/shape/query59.out | 45 + .../tpcds_sf1000_nopkfk/shape/query6.out | 47 + .../tpcds_sf1000_nopkfk/shape/query60.out | 83 + .../tpcds_sf1000_nopkfk/shape/query61.out | 70 + .../tpcds_sf1000_nopkfk/shape/query62.out | 29 + .../tpcds_sf1000_nopkfk/shape/query63.out | 31 + .../tpcds_sf1000_nopkfk/shape/query64.out | 102 + .../tpcds_sf1000_nopkfk/shape/query65.out | 43 + .../tpcds_sf1000_nopkfk/shape/query66.out | 61 + .../tpcds_sf1000_nopkfk/shape/query67.out | 32 + .../tpcds_sf1000_nopkfk/shape/query68.out | 40 + .../tpcds_sf1000_nopkfk/shape/query69.out | 47 + .../tpcds_sf1000_nopkfk/shape/query7.out | 31 + .../tpcds_sf1000_nopkfk/shape/query70.out | 44 + .../tpcds_sf1000_nopkfk/shape/query71.out | 36 + .../tpcds_sf1000_nopkfk/shape/query72.out | 58 + .../tpcds_sf1000_nopkfk/shape/query73.out | 32 + .../tpcds_sf1000_nopkfk/shape/query74.out | 63 + .../tpcds_sf1000_nopkfk/shape/query75.out | 68 + .../tpcds_sf1000_nopkfk/shape/query76.out | 38 + .../tpcds_sf1000_nopkfk/shape/query77.out | 101 + .../tpcds_sf1000_nopkfk/shape/query78.out | 57 + .../tpcds_sf1000_nopkfk/shape/query79.out | 32 + .../tpcds_sf1000_nopkfk/shape/query8.out | 47 + .../tpcds_sf1000_nopkfk/shape/query80.out | 100 + .../tpcds_sf1000_nopkfk/shape/query81.out | 41 + .../tpcds_sf1000_nopkfk/shape/query82.out | 27 + .../tpcds_sf1000_nopkfk/shape/query83.out | 80 + .../tpcds_sf1000_nopkfk/shape/query84.out | 31 + .../tpcds_sf1000_nopkfk/shape/query85.out | 46 + .../tpcds_sf1000_nopkfk/shape/query86.out | 28 + .../tpcds_sf1000_nopkfk/shape/query87.out | 60 + .../tpcds_sf1000_nopkfk/shape/query88.out | 171 ++ .../tpcds_sf1000_nopkfk/shape/query89.out | 31 + .../tpcds_sf1000_nopkfk/shape/query9.out | 115 + .../tpcds_sf1000_nopkfk/shape/query90.out | 47 + .../tpcds_sf1000_nopkfk/shape/query91.out | 41 + .../tpcds_sf1000_nopkfk/shape/query92.out | 25 + .../tpcds_sf1000_nopkfk/shape/query93.out | 21 + .../tpcds_sf1000_nopkfk/shape/query94.out | 35 + .../tpcds_sf1000_nopkfk/shape/query95.out | 44 + .../tpcds_sf1000_nopkfk/shape/query96.out | 26 + .../tpcds_sf1000_nopkfk/shape/query97.out | 35 + .../tpcds_sf1000_nopkfk/shape/query98.out | 26 + .../tpcds_sf1000_nopkfk/shape/query99.out | 29 + .../tpcds_sf1000/shape/query64.groovy | 242 +- .../tpcds_sf1000_nopkfk/load.groovy | 2520 +++++++++++++++++ .../tpcds_sf1000_nopkfk/shape/query1.groovy | 86 + .../tpcds_sf1000_nopkfk/shape/query10.groovy | 154 + .../tpcds_sf1000_nopkfk/shape/query11.groovy | 198 ++ .../tpcds_sf1000_nopkfk/shape/query12.groovy | 104 + .../tpcds_sf1000_nopkfk/shape/query13.groovy | 140 + .../tpcds_sf1000_nopkfk/shape/query14.groovy | 244 ++ .../tpcds_sf1000_nopkfk/shape/query15.groovy | 76 + .../tpcds_sf1000_nopkfk/shape/query16.groovy | 98 + .../tpcds_sf1000_nopkfk/shape/query17.groovy | 126 + .../tpcds_sf1000_nopkfk/shape/query18.groovy | 104 + .../tpcds_sf1000_nopkfk/shape/query19.groovy | 86 + .../tpcds_sf1000_nopkfk/shape/query2.groovy | 156 + .../tpcds_sf1000_nopkfk/shape/query20.groovy | 96 + .../tpcds_sf1000_nopkfk/shape/query21.groovy | 97 + .../tpcds_sf1000_nopkfk/shape/query22.groovy | 76 + .../tpcds_sf1000_nopkfk/shape/query23.groovy | 143 + .../tpcds_sf1000_nopkfk/shape/query24.groovy | 146 + .../tpcds_sf1000_nopkfk/shape/query25.groovy | 132 + .../tpcds_sf1000_nopkfk/shape/query26.groovy | 78 + .../tpcds_sf1000_nopkfk/shape/query27.groovy | 82 + .../tpcds_sf1000_nopkfk/shape/query28.groovy | 142 + .../tpcds_sf1000_nopkfk/shape/query29.groovy | 130 + .../tpcds_sf1000_nopkfk/shape/query3.groovy | 78 + .../tpcds_sf1000_nopkfk/shape/query30.groovy | 98 + .../tpcds_sf1000_nopkfk/shape/query31.groovy | 140 + .../tpcds_sf1000_nopkfk/shape/query32.groovy | 95 + .../tpcds_sf1000_nopkfk/shape/query33.groovy | 186 ++ .../tpcds_sf1000_nopkfk/shape/query34.groovy | 98 + .../tpcds_sf1000_nopkfk/shape/query35.groovy | 152 + .../tpcds_sf1000_nopkfk/shape/query36.groovy | 96 + .../tpcds_sf1000_nopkfk/shape/query37.groovy | 70 + .../tpcds_sf1000_nopkfk/shape/query38.groovy | 85 + .../tpcds_sf1000_nopkfk/shape/query39.groovy | 90 + .../tpcds_sf1000_nopkfk/shape/query4.groovy | 268 ++ .../tpcds_sf1000_nopkfk/shape/query40.groovy | 92 + .../tpcds_sf1000_nopkfk/shape/query41.groovy | 140 + .../tpcds_sf1000_nopkfk/shape/query42.groovy | 80 + .../tpcds_sf1000_nopkfk/shape/query43.groovy | 74 + .../tpcds_sf1000_nopkfk/shape/query44.groovy | 106 + .../tpcds_sf1000_nopkfk/shape/query45.groovy | 76 + .../tpcds_sf1000_nopkfk/shape/query46.groovy | 106 + .../tpcds_sf1000_nopkfk/shape/query47.groovy | 138 + .../tpcds_sf1000_nopkfk/shape/query48.groovy | 170 ++ .../tpcds_sf1000_nopkfk/shape/query49.groovy | 294 ++ .../tpcds_sf1000_nopkfk/shape/query5.groovy | 292 ++ .../tpcds_sf1000_nopkfk/shape/query50.groovy | 154 + .../tpcds_sf1000_nopkfk/shape/query51.groovy | 126 + .../tpcds_sf1000_nopkfk/shape/query52.groovy | 80 + .../tpcds_sf1000_nopkfk/shape/query53.groovy | 92 + .../tpcds_sf1000_nopkfk/shape/query54.groovy | 148 + .../tpcds_sf1000_nopkfk/shape/query55.groovy | 64 + .../tpcds_sf1000_nopkfk/shape/query56.groovy | 174 ++ .../tpcds_sf1000_nopkfk/shape/query57.groovy | 132 + .../tpcds_sf1000_nopkfk/shape/query58.groovy | 166 ++ .../tpcds_sf1000_nopkfk/shape/query59.groovy | 124 + .../tpcds_sf1000_nopkfk/shape/query6.groovy | 88 + .../tpcds_sf1000_nopkfk/shape/query60.groovy | 192 ++ .../tpcds_sf1000_nopkfk/shape/query61.groovy | 124 + .../tpcds_sf1000_nopkfk/shape/query62.groovy | 106 + .../tpcds_sf1000_nopkfk/shape/query63.groovy | 94 + .../tpcds_sf1000_nopkfk/shape/query64.groovy | 279 ++ .../tpcds_sf1000_nopkfk/shape/query65.groovy | 94 + .../tpcds_sf1000_nopkfk/shape/query66.groovy | 476 ++++ .../tpcds_sf1000_nopkfk/shape/query67.groovy | 124 + .../tpcds_sf1000_nopkfk/shape/query68.groovy | 120 + .../tpcds_sf1000_nopkfk/shape/query69.groovy | 130 + .../tpcds_sf1000_nopkfk/shape/query7.groovy | 78 + .../tpcds_sf1000_nopkfk/shape/query70.groovy | 112 + .../tpcds_sf1000_nopkfk/shape/query71.groovy | 116 + .../tpcds_sf1000_nopkfk/shape/query72.groovy | 94 + .../tpcds_sf1000_nopkfk/shape/query73.groovy | 92 + .../tpcds_sf1000_nopkfk/shape/query74.groovy | 158 ++ .../tpcds_sf1000_nopkfk/shape/query75.groovy | 176 ++ .../tpcds_sf1000_nopkfk/shape/query76.groovy | 84 + .../tpcds_sf1000_nopkfk/shape/query77.groovy | 252 ++ .../tpcds_sf1000_nopkfk/shape/query78.groovy | 152 + .../tpcds_sf1000_nopkfk/shape/query79.groovy | 82 + .../tpcds_sf1000_nopkfk/shape/query8.groovy | 253 ++ .../tpcds_sf1000_nopkfk/shape/query80.groovy | 228 ++ .../tpcds_sf1000_nopkfk/shape/query81.groovy | 98 + .../tpcds_sf1000_nopkfk/shape/query82.groovy | 70 + .../tpcds_sf1000_nopkfk/shape/query83.groovy | 170 ++ .../tpcds_sf1000_nopkfk/shape/query84.groovy | 78 + .../tpcds_sf1000_nopkfk/shape/query85.groovy | 204 ++ .../tpcds_sf1000_nopkfk/shape/query86.groovy | 88 + .../tpcds_sf1000_nopkfk/shape/query87.groovy | 82 + .../tpcds_sf1000_nopkfk/shape/query88.groovy | 224 ++ .../tpcds_sf1000_nopkfk/shape/query89.groovy | 92 + .../tpcds_sf1000_nopkfk/shape/query9.groovy | 139 + .../tpcds_sf1000_nopkfk/shape/query90.groovy | 80 + .../tpcds_sf1000_nopkfk/shape/query91.groovy | 98 + .../tpcds_sf1000_nopkfk/shape/query92.groovy | 96 + .../tpcds_sf1000_nopkfk/shape/query93.groovy | 72 + .../tpcds_sf1000_nopkfk/shape/query94.groovy | 94 + .../tpcds_sf1000_nopkfk/shape/query95.groovy | 100 + .../tpcds_sf1000_nopkfk/shape/query96.groovy | 68 + .../tpcds_sf1000_nopkfk/shape/query97.groovy | 89 + .../tpcds_sf1000_nopkfk/shape/query98.groovy | 102 + .../tpcds_sf1000_nopkfk/shape/query99.groovy | 106 + 203 files changed, 20372 insertions(+), 192 deletions(-) create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query1.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query10.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query11.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query12.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query13.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query14.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query15.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query16.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query17.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query18.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query19.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query2.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query20.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query21.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query22.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query23.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query24.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query25.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query26.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query27.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query28.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query29.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query3.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query30.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query31.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query32.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query33.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query34.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query35.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query36.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query37.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query38.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query39.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query4.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query40.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query41.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query42.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query43.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query44.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query45.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query46.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query47.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query48.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query49.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query5.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query50.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query51.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query52.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query53.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query54.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query55.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query56.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query57.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query58.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query59.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query6.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query60.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query61.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query62.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query63.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query64.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query65.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query66.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query67.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query68.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query69.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query7.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query70.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query71.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query72.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query73.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query74.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query75.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query76.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query77.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query78.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query79.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query8.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query80.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query81.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query82.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query83.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query84.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query85.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query86.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query87.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query88.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query89.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query9.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query90.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query91.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query92.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query93.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query94.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query95.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query96.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query97.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query98.out create mode 100644 regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query99.out create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/load.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query1.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query10.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query11.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query12.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query13.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query14.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query15.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query16.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query17.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query18.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query19.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query2.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query20.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query21.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query22.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query23.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query24.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query25.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query26.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query27.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query28.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query29.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query3.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query30.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query31.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query32.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query33.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query34.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query35.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query36.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query37.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query38.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query39.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query4.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query40.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query41.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query42.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query43.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query44.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query45.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query46.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query47.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query48.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query49.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query5.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query50.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query51.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query52.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query53.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query54.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query55.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query56.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query57.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query58.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query59.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query6.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query60.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query61.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query62.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query63.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query64.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query65.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query66.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query67.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query68.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query69.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query7.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query70.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query71.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query72.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query73.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query74.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query75.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query76.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query77.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query78.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query79.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query8.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query80.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query81.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query82.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query83.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query84.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query85.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query86.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query87.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query88.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query89.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query9.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query90.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query91.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query92.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query93.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query94.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query95.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query96.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query97.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query98.groovy create mode 100644 regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query99.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java index 292caf230d0ca2..4432d4ead75132 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java @@ -38,7 +38,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Set; /** * sum(if t1.a then t2.b) diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query23.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query23.out index c38ee45f762950..f514575b3964ce 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query23.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query23.out @@ -53,29 +53,29 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------hashAgg[LOCAL] ----------------PhysicalUnion ------------------PhysicalProject ---------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF5 item_sk->[cs_item_sk] +--------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF5 cs_item_sk->[item_sk] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 ----------------------PhysicalProject ------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[cs_bill_customer_sk] --------------------------PhysicalProject ----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 RF4 RF5 +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 RF4 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_moy = 7) and (date_dim.d_year = 2000)) ----------------------------------PhysicalOlapScan[date_dim] --------------------------PhysicalCteConsumer ( cteId=CTEId#2 ) -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ------------------PhysicalProject ---------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF8 item_sk->[ws_item_sk] +--------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF8 ws_item_sk->[item_sk] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF8 ----------------------PhysicalProject ------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=() build RFs:RF7 c_customer_sk->[ws_bill_customer_sk] --------------------------PhysicalProject ----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ws_sold_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7 RF8 +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7 ------------------------------PhysicalProject --------------------------------filter((date_dim.d_moy = 7) and (date_dim.d_year = 2000)) ----------------------------------PhysicalOlapScan[date_dim] --------------------------PhysicalCteConsumer ( cteId=CTEId#2 ) -----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query64.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query64.out index 0762e95d146c6d..7d5490b3d05c55 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query64.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query64.out @@ -7,85 +7,86 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF19 d_date_sk->[c_first_shipto_date_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF19 i_item_sk->[cr_item_sk,cs_item_sk,sr_item_sk,ss_item_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF18 ss_customer_sk->[c_customer_sk] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF18 ib_income_band_sk->[hd_income_band_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF17 ca_address_sk->[c_current_addr_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF17 ib_income_band_sk->[hd_income_band_sk] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF16 cd_demo_sk->[c_current_cdemo_sk] +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF16 ca_address_sk->[c_current_addr_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF15 d_date_sk->[c_first_sales_date_sk] +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF15 ca_address_sk->[ss_addr_sk] --------------------------------PhysicalProject ----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF14 hd_demo_sk->[c_current_hdemo_sk] ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[customer] apply RFs: RF14 RF15 RF16 RF17 RF18 RF19 -------------------------------------PhysicalProject ---------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF13 ib_income_band_sk->[hd_income_band_sk] -----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[household_demographics(hd2)] apply RFs: RF13 +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF13 hd_demo_sk->[ss_hdemo_sk] ----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[income_band(ib2)] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[date_dim(d2)] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[customer_demographics(cd2)] -------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_address(ad2)] ---------------------PhysicalProject -----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF12 ca_address_sk->[ss_addr_sk] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF10 ss_item_sk->[sr_item_sk];RF11 ss_ticket_number->[sr_ticket_number] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store_returns] apply RFs: RF10 RF11 -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF9 p_promo_sk->[ss_promo_sk] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF8 cd_demo_sk->[ss_cdemo_sk] -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF6 s_store_sk->[ss_store_sk] -------------------------------------------PhysicalProject ---------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF5 ib_income_band_sk->[hd_income_band_sk] -----------------------------------------------PhysicalProject -------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF4 hd_demo_sk->[ss_hdemo_sk] ---------------------------------------------------PhysicalProject -----------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF3 cs_item_sk->[ss_item_sk] -------------------------------------------------------PhysicalProject ---------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] -----------------------------------------------------------PhysicalProject -------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF4 RF6 RF7 RF8 RF9 RF12 -----------------------------------------------------------PhysicalProject -------------------------------------------------------------filter(d_year IN (1999, 2000)) ---------------------------------------------------------------PhysicalOlapScan[date_dim(d1)] -------------------------------------------------------PhysicalProject ---------------------------------------------------------filter((sale > (2 * refund))) -----------------------------------------------------------hashAgg[GLOBAL] -------------------------------------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------------------------------------hashAgg[LOCAL] +------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF12 p_promo_sk->[ss_promo_sk] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF11 cd_demo_sk->[c_current_cdemo_sk] +------------------------------------------------PhysicalProject +--------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF10 cd_demo_sk->[ss_cdemo_sk] +----------------------------------------------------PhysicalProject +------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[c_first_shipto_date_sk] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF8 d_date_sk->[c_first_sales_date_sk] +------------------------------------------------------------PhysicalProject +--------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF7 c_customer_sk->[ss_customer_sk] ----------------------------------------------------------------PhysicalProject -------------------------------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_item_sk->[cs_item_sk];RF1 cr_order_number->[cs_order_number] +------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF6 s_store_sk->[ss_store_sk] --------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF7 +----------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk] +------------------------------------------------------------------------PhysicalProject +--------------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF4 cs_item_sk->[sr_item_sk,ss_item_sk] +----------------------------------------------------------------------------PhysicalProject +------------------------------------------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF2 sr_item_sk->[ss_item_sk];RF3 sr_ticket_number->[ss_ticket_number] +--------------------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF4 RF5 RF6 RF7 RF10 RF12 RF13 RF15 RF19 +--------------------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF19 +----------------------------------------------------------------------------PhysicalProject +------------------------------------------------------------------------------filter((sale > (2 * refund))) +--------------------------------------------------------------------------------hashAgg[GLOBAL] +----------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_item_sk->[cs_item_sk];RF1 cr_order_number->[cs_order_number] +------------------------------------------------------------------------------------------PhysicalProject +--------------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF19 +------------------------------------------------------------------------------------------PhysicalProject +--------------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF19 +------------------------------------------------------------------------PhysicalProject +--------------------------------------------------------------------------filter(d_year IN (1999, 2000)) +----------------------------------------------------------------------------PhysicalOlapScan[date_dim(d1)] --------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF7 ---------------------------------------------------PhysicalProject -----------------------------------------------------PhysicalOlapScan[household_demographics(hd1)] apply RFs: RF5 -----------------------------------------------PhysicalProject -------------------------------------------------PhysicalOlapScan[income_band(ib1)] -------------------------------------------PhysicalProject ---------------------------------------------PhysicalOlapScan[store] ---------------------------------------PhysicalProject -----------------------------------------filter((item.i_current_price <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 'lawn', 'misty', 'orange', 'pink')) -------------------------------------------PhysicalOlapScan[item] +----------------------------------------------------------------------PhysicalOlapScan[store] +----------------------------------------------------------------PhysicalProject +------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF8 RF9 RF11 RF14 RF16 +------------------------------------------------------------PhysicalProject +--------------------------------------------------------------PhysicalOlapScan[date_dim(d2)] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------PhysicalOlapScan[date_dim(d3)] +----------------------------------------------------PhysicalProject +------------------------------------------------------PhysicalOlapScan[customer_demographics(cd1)] +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[customer_demographics(cd2)] +--------------------------------------------PhysicalProject +----------------------------------------------PhysicalOlapScan[promotion] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[household_demographics(hd1)] apply RFs: RF17 ------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[customer_demographics(cd1)] +--------------------------------------PhysicalOlapScan[household_demographics(hd2)] apply RFs: RF18 --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[promotion] +----------------------------------PhysicalOlapScan[customer_address(ad1)] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[customer_address(ad2)] ------------------------PhysicalProject ---------------------------PhysicalOlapScan[customer_address(ad1)] +--------------------------PhysicalOlapScan[income_band(ib1)] +--------------------PhysicalProject +----------------------PhysicalOlapScan[income_band(ib2)] ----------------PhysicalProject -------------------PhysicalOlapScan[date_dim(d3)] +------------------filter((item.i_current_price <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 'lawn', 'misty', 'orange', 'pink')) +--------------------PhysicalOlapScan[item] --PhysicalResultSink ----PhysicalQuickSort[MERGE_SORT] ------PhysicalDistribute[DistributionSpecGather] diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query1.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query1.out new file mode 100644 index 00000000000000..469e6bf7aa10ff --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query1.out @@ -0,0 +1,37 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_1 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecHash] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[sr_returned_date_sk] +----------------PhysicalProject +------------------PhysicalOlapScan[store_returns] apply RFs: RF0 +----------------PhysicalProject +------------------filter((date_dim.d_year = 2000)) +--------------------PhysicalOlapScan[date_dim] +--PhysicalResultSink +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ctr_customer_sk->[c_customer_sk] +--------------PhysicalProject +----------------PhysicalOlapScan[customer] apply RFs: RF3 +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF2 ctr_store_sk->[ctr_store_sk,s_store_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ctr_store_sk] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF1 RF2 +----------------------PhysicalProject +------------------------filter((store.s_state = 'TN')) +--------------------------PhysicalOlapScan[store] apply RFs: RF2 +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query10.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query10.out new file mode 100644 index 00000000000000..d740d8a47904bc --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query10.out @@ -0,0 +1,47 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_10 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------filter(OR[ifnull($c$1, FALSE),ifnull($c$2, FALSE)]) +--------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_moy <= 6) and (date_dim.d_moy >= 3) and (date_dim.d_year = 2001)) +------------------------------PhysicalOlapScan[date_dim] +----------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 +----------------------------PhysicalProject +------------------------------filter((date_dim.d_moy <= 6) and (date_dim.d_moy >= 3) and (date_dim.d_year = 2001)) +--------------------------------PhysicalOlapScan[date_dim] +------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ss_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_moy <= 6) and (date_dim.d_moy >= 3) and (date_dim.d_year = 2001)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) otherCondition=() build RFs:RF1 c_current_cdemo_sk->[cd_demo_sk] +------------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF1 +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[c_current_addr_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[customer(c)] apply RFs: RF0 +----------------------------------PhysicalProject +------------------------------------filter(ca_county IN ('Campbell County', 'Cleburne County', 'Escambia County', 'Fairfield County', 'Washtenaw County')) +--------------------------------------PhysicalOlapScan[customer_address(ca)] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query11.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query11.out new file mode 100644 index 00000000000000..455d0c83c64d55 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query11.out @@ -0,0 +1,63 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_11 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalUnion +------PhysicalProject +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF0 c_customer_sk->[ss_customer_sk] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------------PhysicalProject +--------------------filter(d_year IN (1998, 1999)) +----------------------PhysicalOlapScan[date_dim] +------PhysicalProject +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ws_bill_customer_sk] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------------PhysicalProject +--------------------filter(d_year IN (1998, 1999)) +----------------------PhysicalOlapScan[date_dim] +--PhysicalResultSink +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000) > if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000))) build RFs:RF6 customer_id->[customer_id,customer_id,customer_id] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=() build RFs:RF5 customer_id->[customer_id,customer_id] +------------------hashJoin[INNER_JOIN shuffle] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=() build RFs:RF4 customer_id->[customer_id] +--------------------PhysicalProject +----------------------filter((t_s_secyear.dyear = 1999) and (t_s_secyear.sale_type = 's')) +------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4 RF5 RF6 +--------------------PhysicalProject +----------------------filter((t_s_firstyear.dyear = 1998) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.00)) +------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 RF6 +------------------PhysicalProject +--------------------filter((t_w_firstyear.dyear = 1998) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00)) +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6 +--------------PhysicalProject +----------------filter((t_w_secyear.dyear = 1999) and (t_w_secyear.sale_type = 'w')) +------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query12.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query12.out new file mode 100644 index 00000000000000..f46e97e8a5b3c6 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query12.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_12 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ws_item_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_date <= '2001-07-15') and (date_dim.d_date >= '2001-06-15')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter(i_category IN ('Books', 'Electronics', 'Men')) +------------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query13.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query13.out new file mode 100644 index 00000000000000..027cb5f01acff1 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query13.out @@ -0,0 +1,34 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_13 -- +PhysicalResultSink +--hashAgg[GLOBAL] +----PhysicalDistribute[DistributionSpecGather] +------hashAgg[LOCAL] +--------PhysicalProject +----------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF4 s_store_sk->[ss_store_sk] +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=(OR[AND[ca_state IN ('IL', 'TN', 'TX'),(store_sales.ss_net_profit >= 100.00),(store_sales.ss_net_profit <= 200.00)],AND[ca_state IN ('ID', 'OH', 'WY'),(store_sales.ss_net_profit >= 150.00)],AND[ca_state IN ('IA', 'MS', 'SC'),(store_sales.ss_net_profit <= 250.00)]]) build RFs:RF3 ss_addr_sk->[ca_address_sk] +----------------PhysicalProject +------------------filter((customer_address.ca_country = 'United States') and ca_state IN ('IA', 'ID', 'IL', 'MS', 'OH', 'SC', 'TN', 'TX', 'WY')) +--------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=(OR[AND[(household_demographics.hd_dep_count = 1),OR[AND[(customer_demographics.cd_marital_status = 'D'),(customer_demographics.cd_education_status = 'Primary'),(store_sales.ss_sales_price <= 100.00)],AND[(customer_demographics.cd_marital_status = 'W'),(customer_demographics.cd_education_status = '2 yr Degree'),(store_sales.ss_sales_price >= 150.00)]]],AND[(customer_demographics.cd_marital_status = 'M'),(customer_demographics.cd_education_status = 'College'),(store_sales.ss_sales_price >= 100.00),(store_sales.ss_sales_price <= 150.00),(household_demographics.hd_dep_count = 3)]]) build RFs:RF1 hd_demo_sk->[ss_hdemo_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk)) otherCondition=() build RFs:RF0 cd_demo_sk->[ss_cdemo_sk] +----------------------------PhysicalProject +------------------------------filter((store_sales.ss_net_profit <= 300.00) and (store_sales.ss_net_profit >= 50.00) and (store_sales.ss_sales_price <= 200.00) and (store_sales.ss_sales_price >= 50.00)) +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF4 +----------------------------PhysicalProject +------------------------------filter(OR[AND[(customer_demographics.cd_marital_status = 'D'),(customer_demographics.cd_education_status = 'Primary')],AND[(customer_demographics.cd_marital_status = 'W'),(customer_demographics.cd_education_status = '2 yr Degree')],AND[(customer_demographics.cd_marital_status = 'M'),(customer_demographics.cd_education_status = 'College')]] and cd_education_status IN ('2 yr Degree', 'College', 'Primary') and cd_marital_status IN ('D', 'M', 'W')) +--------------------------------PhysicalOlapScan[customer_demographics] +------------------------PhysicalProject +--------------------------filter(hd_dep_count IN (1, 3)) +----------------------------PhysicalOlapScan[household_demographics] +--------------------PhysicalProject +----------------------filter((date_dim.d_year = 2001)) +------------------------PhysicalOlapScan[date_dim] +------------PhysicalProject +--------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query14.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query14.out new file mode 100644 index 00000000000000..6793b0f43678d8 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query14.out @@ -0,0 +1,148 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_14 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_brand_id = t.brand_id) and (item.i_category_id = t.category_id) and (item.i_class_id = t.class_id)) otherCondition=() build RFs:RF6 brand_id->[i_brand_id];RF7 class_id->[i_class_id];RF8 category_id->[i_category_id] +--------PhysicalProject +----------PhysicalOlapScan[item] apply RFs: RF6 RF7 RF8 +--------PhysicalIntersect RFV2: RF19[brand_id->i_brand_id] RF20[brand_id->i_brand_id] +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = iss.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ss_item_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------------PhysicalProject +--------------------------filter((d1.d_year <= 2001) and (d1.d_year >= 1999)) +----------------------------PhysicalOlapScan[date_dim(d1)] +--------------------PhysicalProject +----------------------PhysicalOlapScan[item(iss)] +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = ics.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[cs_item_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +------------------------PhysicalProject +--------------------------filter((d2.d_year <= 2001) and (d2.d_year >= 1999)) +----------------------------PhysicalOlapScan[date_dim(d2)] +--------------------PhysicalProject +----------------------PhysicalOlapScan[item(ics)] RFV2: RF19 +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = iws.i_item_sk)) otherCondition=() build RFs:RF5 i_item_sk->[ws_item_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 +------------------------PhysicalProject +--------------------------filter((d3.d_year <= 2001) and (d3.d_year >= 1999)) +----------------------------PhysicalOlapScan[date_dim(d3)] +--------------------PhysicalProject +----------------------PhysicalOlapScan[item(iws)] RFV2: RF20 +--PhysicalCteAnchor ( cteId=CTEId#1 ) +----PhysicalCteProducer ( cteId=CTEId#1 ) +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecGather] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[cs_sold_date_sk,ss_sold_date_sk,ws_sold_date_sk] +----------------PhysicalUnion +------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------PhysicalProject +----------------------PhysicalOlapScan[store_sales] apply RFs: RF9 +------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------PhysicalProject +----------------------PhysicalOlapScan[catalog_sales] apply RFs: RF9 +------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------PhysicalProject +----------------------PhysicalOlapScan[web_sales] apply RFs: RF9 +----------------PhysicalProject +------------------filter((date_dim.d_year <= 2001) and (date_dim.d_year >= 1999)) +--------------------PhysicalOlapScan[date_dim] +----PhysicalResultSink +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalRepeat +----------------------PhysicalUnion +------------------------PhysicalProject +--------------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) > avg_sales.average_sales) +----------------------------PhysicalProject +------------------------------hashAgg[GLOBAL] +--------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------hashAgg[LOCAL] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF12 i_item_sk->[ss_item_sk,ss_item_sk] +----------------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk)) otherCondition=() build RFs:RF11 ss_item_sk->[ss_item_sk] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk] +----------------------------------------------PhysicalProject +------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF10 RF11 RF12 +----------------------------------------------PhysicalProject +------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 2001)) +--------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF12 +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[item] +----------------------------PhysicalAssertNumRows +------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------PhysicalCteConsumer ( cteId=CTEId#1 ) +------------------------PhysicalProject +--------------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) > avg_sales.average_sales) +----------------------------PhysicalProject +------------------------------hashAgg[GLOBAL] +--------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------hashAgg[LOCAL] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF15 i_item_sk->[cs_item_sk,ss_item_sk] +----------------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk)) otherCondition=() build RFs:RF14 ss_item_sk->[cs_item_sk] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk] +----------------------------------------------PhysicalProject +------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF13 RF14 RF15 +----------------------------------------------PhysicalProject +------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 2001)) +--------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF15 +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[item] +----------------------------PhysicalAssertNumRows +------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------PhysicalCteConsumer ( cteId=CTEId#1 ) +------------------------PhysicalProject +--------------------------NestedLoopJoin[INNER_JOIN](cast(sales as DECIMALV3(38, 4)) > avg_sales.average_sales) +----------------------------PhysicalProject +------------------------------hashAgg[GLOBAL] +--------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------hashAgg[LOCAL] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF18 i_item_sk->[ss_item_sk,ws_item_sk] +----------------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk)) otherCondition=() build RFs:RF17 ss_item_sk->[ws_item_sk] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk] +----------------------------------------------PhysicalProject +------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF16 RF17 RF18 +----------------------------------------------PhysicalProject +------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 2001)) +--------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF18 +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[item] +----------------------------PhysicalAssertNumRows +------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------PhysicalCteConsumer ( cteId=CTEId#1 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query15.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query15.out new file mode 100644 index 00000000000000..06c1b08293ef85 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query15.out @@ -0,0 +1,25 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_15 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=(OR[substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274'),ca_state IN ('CA', 'GA', 'WA'),(catalog_sales.cs_sales_price > 500.00)]) build RFs:RF2 c_customer_sk->[cs_bill_customer_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF1 RF2 +----------------------PhysicalProject +------------------------filter((date_dim.d_qoy = 2) and (date_dim.d_year = 2001)) +--------------------------PhysicalOlapScan[date_dim] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[c_current_addr_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] apply RFs: RF0 +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer_address] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query16.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query16.out new file mode 100644 index 00000000000000..30da2ea81f8a2a --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query16.out @@ -0,0 +1,35 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_16 -- +PhysicalResultSink +--PhysicalLimit[GLOBAL] +----PhysicalLimit[LOCAL] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute[DistributionSpecGather] +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 cs_order_number->[cs_order_number] +--------------------PhysicalProject +----------------------PhysicalOlapScan[catalog_sales(cs2)] apply RFs: RF4 +--------------------hashJoin[RIGHT_ANTI_JOIN shuffle] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() build RFs:RF3 cs_order_number->[cr_order_number] +----------------------PhysicalProject +------------------------PhysicalOlapScan[catalog_returns(cr1)] apply RFs: RF3 +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[catalog_sales(cs1)] apply RFs: RF0 RF1 RF2 +----------------------------------PhysicalProject +------------------------------------filter((customer_address.ca_state = 'PA')) +--------------------------------------PhysicalOlapScan[customer_address] +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter((call_center.cc_county = 'Williamson County')) +------------------------------PhysicalOlapScan[call_center] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query17.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query17.out new file mode 100644 index 00000000000000..82a1cff667d402 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query17.out @@ -0,0 +1,44 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_17 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk) and (store_returns.sr_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF8 sr_customer_sk->[cs_bill_customer_sk];RF9 sr_item_sk->[cs_item_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF7 d_date_sk->[cs_sold_date_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF7 RF8 RF9 +------------------------PhysicalProject +--------------------------filter(d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3')) +----------------------------PhysicalOlapScan[date_dim(d3)] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF6 i_item_sk->[sr_item_sk,ss_item_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = store_returns.sr_customer_sk) and (store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF2 sr_customer_sk->[ss_customer_sk];RF3 sr_item_sk->[ss_item_sk];RF4 sr_ticket_number->[ss_ticket_number] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((d1.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 RF5 RF6 +------------------------------------PhysicalProject +--------------------------------------filter((d1.d_quarter_name = '2001Q1')) +----------------------------------------PhysicalOlapScan[date_dim(d1)] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_returned_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[sr_returned_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF0 RF6 +------------------------------------PhysicalProject +--------------------------------------filter(d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3')) +----------------------------------------PhysicalOlapScan[date_dim(d2)] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query18.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query18.out new file mode 100644 index 00000000000000..0701882afd123e --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query18.out @@ -0,0 +1,42 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_18 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalRepeat +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF5 i_item_sk->[cs_item_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF2 cd_demo_sk->[cs_bill_cdemo_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 RF4 RF5 +----------------------------------PhysicalProject +------------------------------------filter((cd1.cd_education_status = 'Primary') and (cd1.cd_gender = 'F')) +--------------------------------------PhysicalOlapScan[customer_demographics(cd1)] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF1 c_current_cdemo_sk->[cd_demo_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[customer_demographics(cd2)] apply RFs: RF1 +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[c_current_addr_sk] +--------------------------------------PhysicalProject +----------------------------------------filter(c_birth_month IN (1, 10, 11, 3, 4, 7)) +------------------------------------------PhysicalOlapScan[customer] apply RFs: RF0 +--------------------------------------PhysicalProject +----------------------------------------filter(ca_state IN ('AL', 'CA', 'GA', 'IN', 'MO', 'MT', 'TN')) +------------------------------------------PhysicalOlapScan[customer_address] +--------------------------PhysicalProject +----------------------------filter((date_dim.d_year = 2001)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query19.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query19.out new file mode 100644 index 00000000000000..addae12e92893c --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query19.out @@ -0,0 +1,35 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_19 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=(( not (substring(ca_zip, 1, 5) = substring(s_zip, 1, 5)))) build RFs:RF4 c_current_addr_sk->[ca_address_sk] +--------------------PhysicalProject +----------------------PhysicalOlapScan[customer_address] apply RFs: RF4 +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ss_customer_sk->[c_customer_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[customer] apply RFs: RF3 +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter((item.i_manager_id = 14)) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 2002)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query2.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query2.out new file mode 100644 index 00000000000000..2522a1a9f3f342 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query2.out @@ -0,0 +1,45 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_2 -- +PhysicalCteAnchor ( cteId=CTEId#1 ) +--PhysicalCteProducer ( cteId=CTEId#1 ) +----hashAgg[GLOBAL] +------PhysicalDistribute[DistributionSpecHash] +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = wscs.sold_date_sk)) otherCondition=() build RFs:RF0 sold_date_sk->[d_date_sk] +--------------PhysicalProject +----------------PhysicalOlapScan[date_dim] apply RFs: RF0 +--------------PhysicalUnion +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[web_sales] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[catalog_sales] +--PhysicalResultSink +----PhysicalQuickSort[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalQuickSort[LOCAL_SORT] +----------PhysicalProject +------------hashJoin[INNER_JOIN broadcast] hashCondition=((expr_cast(d_week_seq1 as BIGINT) = expr_(cast(d_week_seq2 as BIGINT) - 53))) otherCondition=() build RFs:RF3 expr_(cast(d_week_seq2 as BIGINT) - 53)->[cast(d_week_seq as BIGINT)] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN shuffle] hashCondition=((date_dim.d_week_seq = d_week_seq1)) otherCondition=() build RFs:RF2 d_week_seq->[d_week_seq] +------------------PhysicalProject +--------------------PhysicalCteConsumer ( cteId=CTEId#1 ) apply RFs: RF2 RF3 +------------------PhysicalProject +--------------------filter((date_dim.d_year = 1998)) +----------------------PhysicalOlapScan[date_dim] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN shuffle] hashCondition=((date_dim.d_week_seq = d_week_seq2)) otherCondition=() build RFs:RF1 d_week_seq->[d_week_seq] +------------------PhysicalProject +--------------------PhysicalCteConsumer ( cteId=CTEId#1 ) apply RFs: RF1 +------------------PhysicalProject +--------------------filter((date_dim.d_year = 1999)) +----------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query20.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query20.out new file mode 100644 index 00000000000000..8728415de8b335 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query20.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_20 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[cs_item_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_date <= '2002-07-18') and (date_dim.d_date >= '2002-06-18')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter(i_category IN ('Books', 'Music', 'Sports')) +------------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query21.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query21.out new file mode 100644 index 00000000000000..7aaa027dd961ca --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query21.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_21 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------filter(((cast(inv_after as DOUBLE) / cast(inv_before as DOUBLE)) <= 1.5) and (if((inv_before > 0), (cast(inv_after as DOUBLE) / cast(inv_before as DOUBLE)), NULL) >= cast((2.000000 / 3.0) as DOUBLE)) and (x.inv_before > 0)) +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_warehouse_sk = warehouse.w_warehouse_sk)) otherCondition=() build RFs:RF2 w_warehouse_sk->[inv_warehouse_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = inventory.inv_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk] +----------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1 RF2 +----------------------------PhysicalProject +------------------------------filter((item.i_current_price <= 1.49) and (item.i_current_price >= 0.99)) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter((date_dim.d_date <= '1999-07-22') and (date_dim.d_date >= '1999-05-23')) +----------------------------PhysicalOlapScan[date_dim] +--------------------PhysicalProject +----------------------PhysicalOlapScan[warehouse] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query22.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query22.out new file mode 100644 index 00000000000000..09dedb98772f96 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query22.out @@ -0,0 +1,23 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_22 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalRepeat +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[inv_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1211) and (date_dim.d_month_seq >= 1200)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query23.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query23.out new file mode 100644 index 00000000000000..425b24b9863ee5 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query23.out @@ -0,0 +1,89 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_23 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------filter((cnt > 4)) +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ss_item_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------PhysicalProject +------------------------filter(d_year IN (2000, 2001, 2002, 2003)) +--------------------------PhysicalOlapScan[date_dim] +------------------PhysicalProject +--------------------PhysicalOlapScan[item] +--PhysicalCteAnchor ( cteId=CTEId#2 ) +----PhysicalCteProducer ( cteId=CTEId#2 ) +------PhysicalProject +--------NestedLoopJoin[INNER_JOIN](cast(ssales as DECIMALV3(38, 6)) > (0.9500 * tpcds_cmax)) +----------PhysicalProject +------------hashAgg[GLOBAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 +------------------PhysicalProject +--------------------PhysicalOlapScan[customer] +----------PhysicalProject +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecGather] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashAgg[GLOBAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ss_customer_sk] +------------------------------hashAgg[GLOBAL] +--------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------hashAgg[LOCAL] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[customer] +--------------------------PhysicalProject +----------------------------filter(d_year IN (2000, 2001, 2002, 2003)) +------------------------------PhysicalOlapScan[date_dim] +----PhysicalResultSink +------PhysicalLimit[GLOBAL] +--------PhysicalLimit[LOCAL] +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecGather] +--------------hashAgg[LOCAL] +----------------PhysicalUnion +------------------PhysicalProject +--------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF7 cs_item_sk->[item_sk] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF7 +----------------------PhysicalProject +------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=() build RFs:RF6 c_customer_sk->[cs_bill_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5 RF6 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_moy = 7) and (date_dim.d_year = 2000)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalCteConsumer ( cteId=CTEId#2 ) +------------------PhysicalProject +--------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF10 ws_item_sk->[item_sk] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF10 +----------------------PhysicalProject +------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=() build RFs:RF9 c_customer_sk->[ws_bill_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF8 d_date_sk->[ws_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF8 RF9 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_moy = 7) and (date_dim.d_year = 2000)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalCteConsumer ( cteId=CTEId#2 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query24.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query24.out new file mode 100644 index 00000000000000..ec4d93d573f2a3 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query24.out @@ -0,0 +1,52 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_24 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecHash] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[i_item_sk,ss_item_sk] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ss_item_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_zip = customer_address.ca_zip) and (store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ca_zip->[s_zip];RF3 c_customer_sk->[ss_customer_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF3 RF4 RF5 RF6 +----------------------------PhysicalProject +------------------------------filter((store.s_market_id = 5)) +--------------------------------PhysicalOlapScan[store] apply RFs: RF2 +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=(( not (c_birth_country = upper(ca_country)))) build RFs:RF0 ca_address_sk->[c_current_addr_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[customer] apply RFs: RF0 +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[customer_address] +--------------------PhysicalProject +----------------------PhysicalOlapScan[item] apply RFs: RF6 +----------------PhysicalProject +------------------PhysicalOlapScan[store_returns] +--PhysicalResultSink +----PhysicalQuickSort[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalQuickSort[LOCAL_SORT] +----------PhysicalProject +------------NestedLoopJoin[INNER_JOIN](cast(paid as DECIMALV3(38, 6)) > 0.05*avg(netpaid)) +--------------PhysicalProject +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalDistribute[DistributionSpecExecutionAny] +------------------------PhysicalProject +--------------------------filter((ssales.i_color = 'aquamarine')) +----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +--------------PhysicalProject +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecGather] +--------------------hashAgg[LOCAL] +----------------------PhysicalDistribute[DistributionSpecExecutionAny] +------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query25.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query25.out new file mode 100644 index 00000000000000..ee55420cf0125d --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query25.out @@ -0,0 +1,43 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_25 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk) and (store_returns.sr_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF8 sr_customer_sk->[cs_bill_customer_sk];RF9 sr_item_sk->[cs_item_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF7 d_date_sk->[cs_sold_date_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF7 RF8 RF9 +----------------------PhysicalProject +------------------------filter((d3.d_moy <= 10) and (d3.d_moy >= 4) and (d3.d_year = 1999)) +--------------------------PhysicalOlapScan[date_dim(d3)] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF6 i_item_sk->[sr_item_sk,ss_item_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = store_returns.sr_customer_sk) and (store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF2 sr_customer_sk->[ss_customer_sk];RF3 sr_item_sk->[ss_item_sk];RF4 sr_ticket_number->[ss_ticket_number] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((d1.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 RF5 RF6 +----------------------------------PhysicalProject +------------------------------------filter((d1.d_moy = 4) and (d1.d_year = 1999)) +--------------------------------------PhysicalOlapScan[date_dim(d1)] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_returned_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[sr_returned_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF0 RF6 +----------------------------------PhysicalProject +------------------------------------filter((d2.d_moy <= 10) and (d2.d_moy >= 4) and (d2.d_year = 1999)) +--------------------------------------PhysicalOlapScan[date_dim(d2)] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store] +----------------------PhysicalProject +------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query26.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query26.out new file mode 100644 index 00000000000000..383242890f9dd4 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query26.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_26 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[cs_item_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF2 p_promo_sk->[cs_promo_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk)) otherCondition=() build RFs:RF0 cd_demo_sk->[cs_bill_cdemo_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 RF3 +------------------------------PhysicalProject +--------------------------------filter((customer_demographics.cd_education_status = 'Unknown') and (customer_demographics.cd_gender = 'M') and (customer_demographics.cd_marital_status = 'W')) +----------------------------------PhysicalOlapScan[customer_demographics] +--------------------------PhysicalProject +----------------------------filter((date_dim.d_year = 2002)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------filter(OR[(promotion.p_channel_email = 'N'),(promotion.p_channel_event = 'N')]) +--------------------------PhysicalOlapScan[promotion] +------------------PhysicalProject +--------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query27.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query27.out new file mode 100644 index 00000000000000..47ceeb712c2a8c --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query27.out @@ -0,0 +1,33 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_27 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalRepeat +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)) otherCondition=() build RFs:RF0 cd_demo_sk->[ss_cdemo_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 +----------------------------------PhysicalProject +------------------------------------filter((customer_demographics.cd_education_status = 'Secondary') and (customer_demographics.cd_gender = 'M') and (customer_demographics.cd_marital_status = 'W')) +--------------------------------------PhysicalOlapScan[customer_demographics] +------------------------------PhysicalProject +--------------------------------filter((store.s_state = 'TN')) +----------------------------------PhysicalOlapScan[store] +--------------------------PhysicalProject +----------------------------filter((date_dim.d_year = 1999)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query28.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query28.out new file mode 100644 index 00000000000000..c1d2341a2a776b --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query28.out @@ -0,0 +1,75 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_28 -- +PhysicalResultSink +--PhysicalLimit[GLOBAL] +----PhysicalLimit[LOCAL] +------NestedLoopJoin[CROSS_JOIN] +--------PhysicalLimit[LOCAL] +----------NestedLoopJoin[CROSS_JOIN] +------------PhysicalLimit[LOCAL] +--------------NestedLoopJoin[CROSS_JOIN] +----------------PhysicalLimit[LOCAL] +------------------NestedLoopJoin[CROSS_JOIN] +--------------------PhysicalLimit[LOCAL] +----------------------NestedLoopJoin[CROSS_JOIN] +------------------------PhysicalLimit[LOCAL] +--------------------------hashAgg[DISTINCT_GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------hashAgg[DISTINCT_LOCAL] +--------------------------------hashAgg[GLOBAL] +----------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------hashAgg[LOCAL] +--------------------------------------PhysicalProject +----------------------------------------filter((store_sales.ss_quantity <= 5) and (store_sales.ss_quantity >= 0) and OR[AND[(store_sales.ss_list_price >= 107.00),(store_sales.ss_list_price <= 117.00)],AND[(store_sales.ss_coupon_amt >= 1319.00),(store_sales.ss_coupon_amt <= 2319.00)],AND[(store_sales.ss_wholesale_cost >= 60.00),(store_sales.ss_wholesale_cost <= 80.00)]]) +------------------------------------------PhysicalOlapScan[store_sales] +------------------------PhysicalLimit[LOCAL] +--------------------------hashAgg[DISTINCT_GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------hashAgg[DISTINCT_LOCAL] +--------------------------------hashAgg[GLOBAL] +----------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------hashAgg[LOCAL] +--------------------------------------PhysicalProject +----------------------------------------filter((store_sales.ss_quantity <= 10) and (store_sales.ss_quantity >= 6) and OR[AND[(store_sales.ss_list_price >= 23.00),(store_sales.ss_list_price <= 33.00)],AND[(store_sales.ss_coupon_amt >= 825.00),(store_sales.ss_coupon_amt <= 1825.00)],AND[(store_sales.ss_wholesale_cost >= 43.00),(store_sales.ss_wholesale_cost <= 63.00)]]) +------------------------------------------PhysicalOlapScan[store_sales] +--------------------PhysicalLimit[LOCAL] +----------------------hashAgg[DISTINCT_GLOBAL] +------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------hashAgg[DISTINCT_LOCAL] +----------------------------hashAgg[GLOBAL] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------hashAgg[LOCAL] +----------------------------------PhysicalProject +------------------------------------filter((store_sales.ss_quantity <= 15) and (store_sales.ss_quantity >= 11) and OR[AND[(store_sales.ss_list_price >= 74.00),(store_sales.ss_list_price <= 84.00)],AND[(store_sales.ss_coupon_amt >= 4381.00),(store_sales.ss_coupon_amt <= 5381.00)],AND[(store_sales.ss_wholesale_cost >= 57.00),(store_sales.ss_wholesale_cost <= 77.00)]]) +--------------------------------------PhysicalOlapScan[store_sales] +----------------PhysicalLimit[LOCAL] +------------------hashAgg[DISTINCT_GLOBAL] +--------------------PhysicalDistribute[DistributionSpecGather] +----------------------hashAgg[DISTINCT_LOCAL] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------filter((store_sales.ss_quantity <= 20) and (store_sales.ss_quantity >= 16) and OR[AND[(store_sales.ss_list_price >= 89.00),(store_sales.ss_list_price <= 99.00)],AND[(store_sales.ss_coupon_amt >= 3117.00),(store_sales.ss_coupon_amt <= 4117.00)],AND[(store_sales.ss_wholesale_cost >= 68.00),(store_sales.ss_wholesale_cost <= 88.00)]]) +----------------------------------PhysicalOlapScan[store_sales] +------------PhysicalLimit[LOCAL] +--------------hashAgg[DISTINCT_GLOBAL] +----------------PhysicalDistribute[DistributionSpecGather] +------------------hashAgg[DISTINCT_LOCAL] +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------filter((store_sales.ss_quantity <= 25) and (store_sales.ss_quantity >= 21) and OR[AND[(store_sales.ss_list_price >= 58.00),(store_sales.ss_list_price <= 68.00)],AND[(store_sales.ss_coupon_amt >= 9402.00),(store_sales.ss_coupon_amt <= 10402.00)],AND[(store_sales.ss_wholesale_cost >= 38.00),(store_sales.ss_wholesale_cost <= 58.00)]]) +------------------------------PhysicalOlapScan[store_sales] +--------PhysicalLimit[LOCAL] +----------hashAgg[DISTINCT_GLOBAL] +------------PhysicalDistribute[DistributionSpecGather] +--------------hashAgg[DISTINCT_LOCAL] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------filter((store_sales.ss_quantity <= 30) and (store_sales.ss_quantity >= 26) and OR[AND[(store_sales.ss_list_price >= 64.00),(store_sales.ss_list_price <= 74.00)],AND[(store_sales.ss_coupon_amt >= 5792.00),(store_sales.ss_coupon_amt <= 6792.00)],AND[(store_sales.ss_wholesale_cost >= 73.00),(store_sales.ss_wholesale_cost <= 93.00)]]) +--------------------------PhysicalOlapScan[store_sales] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query29.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query29.out new file mode 100644 index 00000000000000..7505cb3ea50028 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query29.out @@ -0,0 +1,43 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_29 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[cs_sold_date_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk) and (store_returns.sr_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF7 sr_customer_sk->[cs_bill_customer_sk];RF8 sr_item_sk->[cs_item_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF7 RF8 RF9 +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF6 i_item_sk->[sr_item_sk,ss_item_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = store_returns.sr_customer_sk) and (store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF2 sr_customer_sk->[ss_customer_sk];RF3 sr_item_sk->[ss_item_sk];RF4 sr_ticket_number->[ss_ticket_number] +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((d1.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 RF5 RF6 +--------------------------------------PhysicalProject +----------------------------------------filter((d1.d_moy = 4) and (d1.d_year = 1998)) +------------------------------------------PhysicalOlapScan[date_dim(d1)] +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_returned_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[sr_returned_date_sk] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF0 RF6 +--------------------------------------PhysicalProject +----------------------------------------filter((d2.d_moy <= 7) and (d2.d_moy >= 4) and (d2.d_year = 1998)) +------------------------------------------PhysicalOlapScan[date_dim(d2)] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[item] +------------------PhysicalProject +--------------------filter(d_year IN (1998, 1999, 2000)) +----------------------PhysicalOlapScan[date_dim(d3)] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query3.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query3.out new file mode 100644 index 00000000000000..fae84ff1a42849 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query3.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_3 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ss_item_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------------PhysicalProject +--------------------------filter((dt.d_moy = 11)) +----------------------------PhysicalOlapScan[date_dim(dt)] +--------------------PhysicalProject +----------------------filter((item.i_manufact_id = 816)) +------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query30.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query30.out new file mode 100644 index 00000000000000..811d6cd3bf4466 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query30.out @@ -0,0 +1,41 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_30 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecHash] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_returns.wr_returning_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[wr_returning_addr_sk] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_returns.wr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[wr_returned_date_sk] +--------------------PhysicalProject +----------------------PhysicalOlapScan[web_returns] apply RFs: RF0 RF1 +--------------------PhysicalProject +----------------------filter((date_dim.d_year = 2000)) +------------------------PhysicalOlapScan[date_dim] +----------------PhysicalProject +------------------PhysicalOlapScan[customer_address] +--PhysicalResultSink +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ctr_customer_sk->[c_customer_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] apply RFs: RF2 RF3 +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4 +------------------PhysicalProject +--------------------filter((customer_address.ca_state = 'AR')) +----------------------PhysicalOlapScan[customer_address] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalDistribute[DistributionSpecExecutionAny] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query31.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query31.out new file mode 100644 index 00000000000000..ddd91f4702c00c --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query31.out @@ -0,0 +1,73 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_31 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecHash] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +--------------------PhysicalProject +----------------------filter((ss.d_year = 1999) and d_qoy IN (1, 2, 3)) +------------------------PhysicalOlapScan[date_dim] +----------------PhysicalProject +------------------PhysicalOlapScan[customer_address] +--PhysicalCteAnchor ( cteId=CTEId#1 ) +----PhysicalCteProducer ( cteId=CTEId#1 ) +------PhysicalProject +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ws_bill_addr_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 +----------------------PhysicalProject +------------------------filter((ws.d_year = 1999) and d_qoy IN (1, 2, 3)) +--------------------------PhysicalOlapScan[date_dim] +------------------PhysicalProject +--------------------PhysicalOlapScan[customer_address] +----PhysicalResultSink +------PhysicalQuickSort[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalQuickSort[LOCAL_SORT] +------------PhysicalProject +--------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ws1.ca_county = ws3.ca_county)) otherCondition=((if((web_sales > 0.00), (cast(web_sales as DECIMALV3(38, 8)) / web_sales), NULL) > if((store_sales > 0.00), (cast(store_sales as DECIMALV3(38, 8)) / store_sales), NULL))) build RFs:RF8 ca_county->[ca_county] +----------------PhysicalProject +------------------filter((ws3.d_qoy = 3) and (ws3.d_year = 1999)) +--------------------PhysicalCteConsumer ( cteId=CTEId#1 ) apply RFs: RF8 +----------------PhysicalProject +------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ws1.ca_county = ws2.ca_county)) otherCondition=((if((web_sales > 0.00), (cast(web_sales as DECIMALV3(38, 8)) / web_sales), NULL) > if((store_sales > 0.00), (cast(store_sales as DECIMALV3(38, 8)) / store_sales), NULL))) build RFs:RF7 ca_county->[ca_county] +--------------------PhysicalProject +----------------------filter((ws2.d_qoy = 2) and (ws2.d_year = 1999)) +------------------------PhysicalCteConsumer ( cteId=CTEId#1 ) apply RFs: RF7 +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((ss1.ca_county = ws1.ca_county)) otherCondition=() build RFs:RF6 ca_county->[ca_county,ca_county] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((ss2.ca_county = ss3.ca_county)) otherCondition=() build RFs:RF5 ca_county->[ca_county,ca_county] +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss1.ca_county = ss2.ca_county)) otherCondition=() build RFs:RF4 ca_county->[ca_county] +----------------------------PhysicalProject +------------------------------filter((ss1.d_qoy = 1) and (ss1.d_year = 1999)) +--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4 RF5 RF6 +----------------------------PhysicalProject +------------------------------filter((ss2.d_qoy = 2) and (ss2.d_year = 1999)) +--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 RF6 +--------------------------PhysicalProject +----------------------------filter((ss3.d_qoy = 3) and (ss3.d_year = 1999)) +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------PhysicalProject +------------------------filter((ws1.d_qoy = 1) and (ws1.d_year = 1999)) +--------------------------PhysicalCteConsumer ( cteId=CTEId#1 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query32.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query32.out new file mode 100644 index 00000000000000..cb7cc0e46364ed --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query32.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_32 -- +PhysicalResultSink +--PhysicalLimit[GLOBAL] +----PhysicalLimit[LOCAL] +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecGather] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cs_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) +----------------PhysicalWindow +------------------PhysicalQuickSort[LOCAL_SORT] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 +------------------------------PhysicalProject +--------------------------------filter((item.i_manufact_id = 722)) +----------------------------------PhysicalOlapScan[item] +--------------------------PhysicalProject +----------------------------filter((date_dim.d_date <= '2001-06-07') and (date_dim.d_date >= '2001-03-09')) +------------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query33.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query33.out new file mode 100644 index 00000000000000..1f4f083cdcf52f --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query33.out @@ -0,0 +1,83 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_33 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalUnion +----------------PhysicalProject +------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() build RFs:RF3 i_manufact_id->[i_manufact_id] +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk] +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------------PhysicalProject +----------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2001)) +------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------PhysicalProject +------------------------------------filter((customer_address.ca_gmt_offset = -5.00)) +--------------------------------------PhysicalOlapScan[customer_address] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[item] apply RFs: RF3 +--------------------PhysicalProject +----------------------filter((item.i_category = 'Books')) +------------------------PhysicalOlapScan[item] +----------------PhysicalProject +------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() build RFs:RF7 i_manufact_id->[i_manufact_id] +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF6 i_item_sk->[cs_item_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF5 ca_address_sk->[cs_bill_addr_sk] +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[cs_sold_date_sk] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 RF5 RF6 +--------------------------------------PhysicalProject +----------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2001)) +------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------PhysicalProject +------------------------------------filter((customer_address.ca_gmt_offset = -5.00)) +--------------------------------------PhysicalOlapScan[customer_address] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[item] apply RFs: RF7 +--------------------PhysicalProject +----------------------filter((item.i_category = 'Books')) +------------------------PhysicalOlapScan[item] +----------------PhysicalProject +------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() build RFs:RF11 i_manufact_id->[i_manufact_id] +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF10 i_item_sk->[ws_item_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF9 ca_address_sk->[ws_bill_addr_sk] +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF8 d_date_sk->[ws_sold_date_sk] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF8 RF9 RF10 +--------------------------------------PhysicalProject +----------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2001)) +------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------PhysicalProject +------------------------------------filter((customer_address.ca_gmt_offset = -5.00)) +--------------------------------------PhysicalOlapScan[customer_address] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[item] apply RFs: RF11 +--------------------PhysicalProject +----------------------filter((item.i_category = 'Books')) +------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query34.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query34.out new file mode 100644 index 00000000000000..b9f187a3aaabee --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query34.out @@ -0,0 +1,32 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_34 -- +PhysicalResultSink +--PhysicalQuickSort[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalQuickSort[LOCAL_SORT] +--------PhysicalProject +----------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ss_customer_sk->[c_customer_sk] +------------PhysicalProject +--------------PhysicalOlapScan[customer] apply RFs: RF3 +------------filter((dn.cnt <= 20) and (dn.cnt >= 15)) +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF0 s_store_sk->[ss_store_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------filter((store.s_county = 'Williamson County')) +------------------------------------PhysicalOlapScan[store] +----------------------------PhysicalProject +------------------------------filter((date_dim.d_dom <= 28) and (date_dim.d_dom >= 1) and OR[(date_dim.d_dom <= 3),(date_dim.d_dom >= 25)] and d_year IN (2000, 2001, 2002)) +--------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------filter(((cast(hd_dep_count as DOUBLE) / cast(hd_vehicle_count as DOUBLE)) > 1.2) and (household_demographics.hd_vehicle_count > 0) and hd_buy_potential IN ('0-500', '1001-5000')) +----------------------------PhysicalOlapScan[household_demographics] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query35.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query35.out new file mode 100644 index 00000000000000..5eb3a1ba4f931b --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query35.out @@ -0,0 +1,47 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_35 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------filter(OR[ifnull($c$1, FALSE),ifnull($c$2, FALSE)]) +--------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) otherCondition=() build RFs:RF5 cd_demo_sk->[c_current_cdemo_sk] +--------------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() +----------------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF3 RF4 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 1999)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() build RFs:RF2 ca_address_sk->[c_current_addr_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[customer(c)] apply RFs: RF2 RF5 +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[customer_address(ca)] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1 +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 1999)) +------------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer_demographics] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 1999)) +------------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query36.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query36.out new file mode 100644 index 00000000000000..f75ea80df28def --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query36.out @@ -0,0 +1,33 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_36 -- +PhysicalResultSink +--PhysicalProject +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalRepeat +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((d1.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF0 s_store_sk->[ss_store_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------------PhysicalProject +------------------------------------------filter((store.s_state = 'TN')) +--------------------------------------------PhysicalOlapScan[store] +------------------------------------PhysicalProject +--------------------------------------filter((d1.d_year = 2000)) +----------------------------------------PhysicalOlapScan[date_dim(d1)] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query37.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query37.out new file mode 100644 index 00000000000000..149a61f7b37054 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query37.out @@ -0,0 +1,27 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_37 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk] +------------------PhysicalProject +--------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk] +--------------------------PhysicalProject +----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100)) +------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1 +--------------------------PhysicalProject +----------------------------filter((item.i_current_price <= 59.00) and (item.i_current_price >= 29.00) and i_manufact_id IN (705, 742, 777, 944)) +------------------------------PhysicalOlapScan[item] +----------------------PhysicalProject +------------------------filter((date_dim.d_date <= '2002-05-28') and (date_dim.d_date >= '2002-03-29')) +--------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query38.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query38.out new file mode 100644 index 00000000000000..541c967c35d15b --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query38.out @@ -0,0 +1,62 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_38 -- +PhysicalResultSink +--PhysicalLimit[GLOBAL] +----PhysicalLimit[LOCAL] +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecGather] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------PhysicalIntersect +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +------------------------------hashAgg[GLOBAL] +--------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------hashAgg[LOCAL] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +------------------------------hashAgg[GLOBAL] +--------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------hashAgg[LOCAL] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] +------------------------------hashAgg[GLOBAL] +--------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------hashAgg[LOCAL] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query39.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query39.out new file mode 100644 index 00000000000000..b826c2ac59911a --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query39.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_39 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------filter(( not (mean = 0.0)) and ((foo.stdev / foo.mean) > 1.0)) +--------hashAgg[GLOBAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[inv_item_sk] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_warehouse_sk = warehouse.w_warehouse_sk)) otherCondition=() build RFs:RF1 w_warehouse_sk->[inv_warehouse_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[inv_date_sk] +----------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1 RF2 +----------------------PhysicalProject +------------------------filter((date_dim.d_year = 2000) and d_moy IN (1, 2)) +--------------------------PhysicalOlapScan[date_dim] +------------------PhysicalProject +--------------------PhysicalOlapScan[warehouse] +--------------PhysicalProject +----------------PhysicalOlapScan[item] +--PhysicalResultSink +----PhysicalQuickSort[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalQuickSort[LOCAL_SORT] +----------hashJoin[INNER_JOIN shuffle] hashCondition=((inv1.i_item_sk = inv2.i_item_sk) and (inv1.w_warehouse_sk = inv2.w_warehouse_sk)) otherCondition=() build RFs:RF3 i_item_sk->[i_item_sk];RF4 w_warehouse_sk->[w_warehouse_sk] +------------filter((inv1.d_moy = 1)) +--------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3 RF4 +------------filter((inv2.d_moy = 2)) +--------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query4.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query4.out new file mode 100644 index 00000000000000..ba49181568defe --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query4.out @@ -0,0 +1,91 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_4 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalUnion +------PhysicalProject +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF0 c_customer_sk->[ss_customer_sk] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------------PhysicalProject +--------------------filter(d_year IN (1999, 2000)) +----------------------PhysicalOlapScan[date_dim] +------PhysicalProject +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_customer_sk = catalog_sales.cs_bill_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[cs_bill_customer_sk] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------------PhysicalProject +--------------------filter(d_year IN (1999, 2000)) +----------------------PhysicalOlapScan[date_dim] +------PhysicalProject +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ws_bill_customer_sk] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------------PhysicalProject +--------------------filter(d_year IN (1999, 2000)) +----------------------PhysicalOlapScan[date_dim] +--PhysicalResultSink +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL))) build RFs:RF10 customer_id->[customer_id,customer_id,customer_id,customer_id,customer_id] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=() build RFs:RF9 customer_id->[customer_id,customer_id,customer_id,customer_id] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL))) build RFs:RF8 customer_id->[customer_id,customer_id,customer_id] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_c_firstyear.customer_id)) otherCondition=() build RFs:RF7 customer_id->[customer_id,customer_id] +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=() build RFs:RF6 customer_id->[customer_id] +----------------------------PhysicalProject +------------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's')) +--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6 RF7 RF8 RF9 RF10 +----------------------------PhysicalProject +------------------------------filter((t_s_firstyear.dyear = 1999) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.000000)) +--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF7 RF8 RF9 RF10 +--------------------------PhysicalProject +----------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000)) +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF8 RF9 RF10 +----------------------PhysicalProject +------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c')) +--------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF9 RF10 +------------------PhysicalProject +--------------------filter((t_w_firstyear.dyear = 1999) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.000000)) +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF10 +--------------PhysicalProject +----------------filter((t_w_secyear.dyear = 2000) and (t_w_secyear.sale_type = 'w')) +------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query40.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query40.out new file mode 100644 index 00000000000000..9ecf8a19e0cae1 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query40.out @@ -0,0 +1,30 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_40 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[RIGHT_OUTER_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF3 cs_order_number->[cr_order_number];RF4 cs_item_sk->[cr_item_sk] +------------------PhysicalProject +--------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3 RF4 +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_warehouse_sk = warehouse.w_warehouse_sk)) otherCondition=() build RFs:RF2 w_warehouse_sk->[cs_warehouse_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 +------------------------------PhysicalProject +--------------------------------filter((item.i_current_price <= 1.49) and (item.i_current_price >= 0.99)) +----------------------------------PhysicalOlapScan[item] +--------------------------PhysicalProject +----------------------------filter((date_dim.d_date <= '2001-06-01') and (date_dim.d_date >= '2001-04-02')) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[warehouse] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query41.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query41.out new file mode 100644 index 00000000000000..92045bb79faf37 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query41.out @@ -0,0 +1,23 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_41 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_manufact = i1.i_manufact)) otherCondition=() build RFs:RF0 i_manufact->[i_manufact] +------------------PhysicalProject +--------------------filter((i1.i_manufact_id <= 744) and (i1.i_manufact_id >= 704)) +----------------------PhysicalOlapScan[item(i1)] apply RFs: RF0 +------------------PhysicalProject +--------------------filter((item_cnt > 0)) +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------filter(OR[AND[(item.i_category = 'Men'),i_size IN ('N/A', 'economy', 'large', 'small'),OR[AND[i_size IN ('economy', 'small'),i_color IN ('firebrick', 'maroon', 'sienna', 'smoke'),i_units IN ('Case', 'Cup', 'Each', 'Ounce'),OR[AND[i_color IN ('maroon', 'smoke'),i_units IN ('Case', 'Ounce')],AND[i_color IN ('firebrick', 'sienna'),i_units IN ('Cup', 'Each')]]],AND[i_size IN ('N/A', 'large'),i_color IN ('papaya', 'peach', 'powder', 'sky'),i_units IN ('Bundle', 'Carton', 'Dozen', 'Lb'),OR[AND[i_color IN ('powder', 'sky'),i_units IN ('Dozen', 'Lb')],AND[i_color IN ('papaya', 'peach'),i_units IN ('Bundle', 'Carton')]]]]],AND[(item.i_category = 'Women'),i_size IN ('economy', 'extra large', 'petite', 'small'),OR[AND[i_size IN ('economy', 'small'),i_color IN ('aquamarine', 'dark', 'forest', 'lime'),i_units IN ('Pallet', 'Pound', 'Tbl', 'Ton'),OR[AND[i_color IN ('forest', 'lime'),i_units IN ('Pallet', 'Pound')],AND[i_color IN ('aquamarine', 'dark'),i_units IN ('Tbl', 'Ton')]]],AND[i_size IN ('extra large', 'petite'),i_color IN ('frosted', 'navy', 'plum', 'slate'),i_units IN ('Box', 'Bunch', 'Dram', 'Gross'),OR[AND[i_color IN ('navy', 'slate'),i_units IN ('Bunch', 'Gross')],AND[i_color IN ('frosted', 'plum'),i_units IN ('Box', 'Dram')]]]]]] and i_category IN ('Men', 'Women')) +--------------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query42.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query42.out new file mode 100644 index 00000000000000..68f93698b9a036 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query42.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_42 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------------PhysicalProject +--------------------------filter((item.i_manager_id = 1)) +----------------------------PhysicalOlapScan[item] +--------------------PhysicalProject +----------------------filter((dt.d_moy = 11) and (dt.d_year = 1998)) +------------------------PhysicalOlapScan[date_dim(dt)] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query43.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query43.out new file mode 100644 index 00000000000000..37ab89010ef0a9 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query43.out @@ -0,0 +1,22 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_43 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------PhysicalProject +------------------------filter((date_dim.d_year = 2000)) +--------------------------PhysicalOlapScan[date_dim] +------------------PhysicalProject +--------------------filter((store.s_gmt_offset = -5.00)) +----------------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query44.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query44.out new file mode 100644 index 00000000000000..b733c203a720df --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query44.out @@ -0,0 +1,71 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_44 -- +PhysicalResultSink +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(asceding.rnk) lazySlots:(best_performing,worst_performing)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=() +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i2.i_product_name)] apply RFs: RF1 +--------------------PhysicalProject +----------------------filter((V21.rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN +----------------------------------PhysicalProject +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DECIMALV3(38, 5)) > (0.9 * rank_col)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 4)) +--------------------------------------------------PhysicalOlapScan[store_sales(ss1)] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(i1.i_product_name)] apply RFs: RF0 +--------------------PhysicalProject +----------------------filter((V11.rnk < 11)) +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[MERGE_SORT] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------PhysicalPartitionTopN +----------------------------------PhysicalProject +------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DECIMALV3(38, 5)) > (0.9 * rank_col)) +--------------------------------------PhysicalProject +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((ss1.ss_store_sk = 4)) +--------------------------------------------------PhysicalOlapScan[store_sales(ss1)] +--------------------------------------PhysicalProject +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------hashAgg[GLOBAL] +------------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------------hashAgg[LOCAL] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL) +--------------------------------------------------------PhysicalOlapScan[store_sales] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query45.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query45.out new file mode 100644 index 00000000000000..dcc2e202e23752 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query45.out @@ -0,0 +1,35 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_45 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------filter(OR[substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274'),$c$1]) +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ws_item_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ws_bill_customer_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1 RF2 RF3 +----------------------------PhysicalProject +------------------------------filter((date_dim.d_qoy = 1) and (date_dim.d_year = 2000)) +--------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[c_current_addr_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[customer] apply RFs: RF0 +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[customer_address] +--------------------PhysicalProject +----------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=() +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter(i_item_sk IN (11, 13, 17, 19, 2, 23, 29, 3, 5, 7)) +----------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query46.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query46.out new file mode 100644 index 00000000000000..b54a98e5dcb88b --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query46.out @@ -0,0 +1,38 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_46 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 ca_address_sk->[c_current_addr_sk] +------------PhysicalProject +--------------hashJoin[INNER_JOIN shuffle] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] +----------------PhysicalProject +------------------PhysicalOlapScan[customer] apply RFs: RF4 RF5 +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ss_addr_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF0 s_store_sk->[ss_store_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 +------------------------------------PhysicalProject +--------------------------------------filter(s_city IN ('Fairview', 'Midway')) +----------------------------------------PhysicalOlapScan[store] +--------------------------------PhysicalProject +----------------------------------filter(d_dow IN (0, 6) and d_year IN (2000, 2001, 2002)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------filter(OR[(household_demographics.hd_dep_count = 8),(household_demographics.hd_vehicle_count = 0)]) +--------------------------------PhysicalOlapScan[household_demographics] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[customer_address] +------------PhysicalProject +--------------PhysicalOlapScan[customer_address(current_addr)] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query47.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query47.out new file mode 100644 index 00000000000000..096bc2502ec644 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query47.out @@ -0,0 +1,43 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_47 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------PhysicalWindow +--------PhysicalQuickSort[LOCAL_SORT] +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------filter(OR[(date_dim.d_year = 2000),AND[(date_dim.d_year = 1999),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2001),(date_dim.d_moy = 1)]] and d_year IN (1999, 2000, 2001)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] +--PhysicalResultSink +----PhysicalProject +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((v1.i_brand = v1_lead.i_brand) and (v1.i_category = v1_lead.i_category) and (v1.rn = expr_(rn - 1)) and (v1.s_company_name = v1_lead.s_company_name) and (v1.s_store_name = v1_lead.s_store_name)) otherCondition=() build RFs:RF8 i_category->[i_category,i_category];RF9 i_brand->[i_brand,i_brand];RF10 s_store_name->[s_store_name,s_store_name];RF11 s_company_name->[s_company_name,s_company_name];RF12 expr_(rn - 1)->[(rn + 1),rn] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN shuffle] hashCondition=((v1.i_brand = v1_lag.i_brand) and (v1.i_category = v1_lag.i_category) and (v1.rn = expr_(rn + 1)) and (v1.s_company_name = v1_lag.s_company_name) and (v1.s_store_name = v1_lag.s_store_name)) otherCondition=() build RFs:RF3 i_category->[i_category];RF4 i_brand->[i_brand];RF5 s_store_name->[s_store_name];RF6 s_company_name->[s_company_name];RF7 rn->[(rn + 1)] +--------------------PhysicalProject +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3 RF4 RF5 RF6 RF7 RF8 RF9 RF10 RF11 RF12 +--------------------filter(((cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / v2.avg_monthly_sales) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2000)) +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF8 RF9 RF10 RF11 RF12 +----------------PhysicalProject +------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query48.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query48.out new file mode 100644 index 00000000000000..d11dadeae0b923 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query48.out @@ -0,0 +1,29 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_48 -- +PhysicalResultSink +--hashAgg[GLOBAL] +----PhysicalDistribute[DistributionSpecGather] +------hashAgg[LOCAL] +--------PhysicalProject +----------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=(OR[AND[ca_state IN ('ND', 'NY', 'SD'),(store_sales.ss_net_profit <= 2000.00)],AND[ca_state IN ('GA', 'KS', 'MD'),(store_sales.ss_net_profit >= 150.00),(store_sales.ss_net_profit <= 3000.00)],AND[ca_state IN ('CO', 'MN', 'NC'),(store_sales.ss_net_profit >= 50.00)]]) build RFs:RF1 ca_address_sk->[ss_addr_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk)) otherCondition=(OR[AND[(customer_demographics.cd_marital_status = 'S'),(customer_demographics.cd_education_status = 'Secondary'),(store_sales.ss_sales_price >= 100.00),(store_sales.ss_sales_price <= 150.00)],AND[(customer_demographics.cd_marital_status = 'M'),(customer_demographics.cd_education_status = '2 yr Degree'),(store_sales.ss_sales_price <= 100.00)],AND[(customer_demographics.cd_marital_status = 'D'),(customer_demographics.cd_education_status = 'Advanced Degree'),(store_sales.ss_sales_price >= 150.00)]]) build RFs:RF0 cd_demo_sk->[ss_cdemo_sk] +------------------------PhysicalProject +--------------------------filter((store_sales.ss_net_profit <= 25000.00) and (store_sales.ss_net_profit >= 0.00) and (store_sales.ss_sales_price <= 200.00) and (store_sales.ss_sales_price >= 50.00)) +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 +------------------------PhysicalProject +--------------------------filter(OR[AND[(customer_demographics.cd_marital_status = 'S'),(customer_demographics.cd_education_status = 'Secondary')],AND[(customer_demographics.cd_marital_status = 'M'),(customer_demographics.cd_education_status = '2 yr Degree')],AND[(customer_demographics.cd_marital_status = 'D'),(customer_demographics.cd_education_status = 'Advanced Degree')]] and cd_education_status IN ('2 yr Degree', 'Advanced Degree', 'Secondary') and cd_marital_status IN ('D', 'M', 'S')) +----------------------------PhysicalOlapScan[customer_demographics] +--------------------PhysicalProject +----------------------filter((customer_address.ca_country = 'United States') and ca_state IN ('CO', 'GA', 'KS', 'MD', 'MN', 'NC', 'ND', 'NY', 'SD')) +------------------------PhysicalOlapScan[customer_address] +----------------PhysicalProject +------------------filter((date_dim.d_year = 2001)) +--------------------PhysicalOlapScan[date_dim] +------------PhysicalProject +--------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query49.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query49.out new file mode 100644 index 00000000000000..2c3bc534c872d3 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query49.out @@ -0,0 +1,107 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_49 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalUnion +----------------PhysicalDistribute[DistributionSpecExecutionAny] +------------------PhysicalTopN[MERGE_SORT] +--------------------PhysicalDistribute[DistributionSpecGather] +----------------------PhysicalTopN[LOCAL_SORT] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------filter(OR[(web.return_rank <= 10),(web.currency_rank <= 10)]) +----------------------------------PhysicalWindow +------------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------------PhysicalWindow +----------------------------------------PhysicalQuickSort[MERGE_SORT] +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------------------------PhysicalProject +------------------------------------------------hashAgg[GLOBAL] +--------------------------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------------------------hashAgg[LOCAL] +------------------------------------------------------PhysicalProject +--------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ws.ws_item_sk = wr.wr_item_sk) and (ws.ws_order_number = wr.wr_order_number)) otherCondition=() build RFs:RF1 ws_order_number->[wr_order_number];RF2 ws_item_sk->[wr_item_sk] +----------------------------------------------------------PhysicalProject +------------------------------------------------------------filter((wr.wr_return_amt > 10000.00)) +--------------------------------------------------------------PhysicalOlapScan[web_returns(wr)] apply RFs: RF1 RF2 +----------------------------------------------------------PhysicalProject +------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ws.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +--------------------------------------------------------------PhysicalProject +----------------------------------------------------------------filter((ws.ws_net_paid > 0.00) and (ws.ws_net_profit > 1.00) and (ws.ws_quantity > 0)) +------------------------------------------------------------------PhysicalOlapScan[web_sales(ws)] apply RFs: RF0 +--------------------------------------------------------------PhysicalProject +----------------------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) +------------------------------------------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalDistribute[DistributionSpecExecutionAny] +------------------PhysicalTopN[MERGE_SORT] +--------------------PhysicalDistribute[DistributionSpecGather] +----------------------PhysicalTopN[LOCAL_SORT] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------filter(OR[(catalog.return_rank <= 10),(catalog.currency_rank <= 10)]) +----------------------------------PhysicalWindow +------------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------------PhysicalWindow +----------------------------------------PhysicalQuickSort[MERGE_SORT] +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------------------------PhysicalProject +------------------------------------------------hashAgg[GLOBAL] +--------------------------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------------------------hashAgg[LOCAL] +------------------------------------------------------PhysicalProject +--------------------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((cs.cs_item_sk = cr.cr_item_sk) and (cs.cs_order_number = cr.cr_order_number)) otherCondition=() build RFs:RF4 cs_order_number->[cr_order_number];RF5 cs_item_sk->[cr_item_sk] +----------------------------------------------------------PhysicalProject +------------------------------------------------------------filter((cr.cr_return_amount > 10000.00)) +--------------------------------------------------------------PhysicalOlapScan[catalog_returns(cr)] apply RFs: RF4 RF5 +----------------------------------------------------------PhysicalProject +------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk] +--------------------------------------------------------------PhysicalProject +----------------------------------------------------------------filter((cs.cs_net_paid > 0.00) and (cs.cs_net_profit > 1.00) and (cs.cs_quantity > 0)) +------------------------------------------------------------------PhysicalOlapScan[catalog_sales(cs)] apply RFs: RF3 +--------------------------------------------------------------PhysicalProject +----------------------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) +------------------------------------------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalDistribute[DistributionSpecExecutionAny] +------------------PhysicalTopN[MERGE_SORT] +--------------------PhysicalDistribute[DistributionSpecGather] +----------------------PhysicalTopN[LOCAL_SORT] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------filter(OR[(store.return_rank <= 10),(store.currency_rank <= 10)]) +----------------------------------PhysicalWindow +------------------------------------PhysicalQuickSort[LOCAL_SORT] +--------------------------------------PhysicalWindow +----------------------------------------PhysicalQuickSort[MERGE_SORT] +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------------------------PhysicalProject +------------------------------------------------hashAgg[GLOBAL] +--------------------------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------------------------hashAgg[LOCAL] +------------------------------------------------------PhysicalProject +--------------------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((sts.ss_item_sk = sr.sr_item_sk) and (sts.ss_ticket_number = sr.sr_ticket_number)) otherCondition=() build RFs:RF7 ss_ticket_number->[sr_ticket_number];RF8 ss_item_sk->[sr_item_sk] +----------------------------------------------------------PhysicalProject +------------------------------------------------------------filter((sr.sr_return_amt > 10000.00)) +--------------------------------------------------------------PhysicalOlapScan[store_returns(sr)] apply RFs: RF7 RF8 +----------------------------------------------------------PhysicalProject +------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((sts.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk] +--------------------------------------------------------------PhysicalProject +----------------------------------------------------------------filter((sts.ss_net_paid > 0.00) and (sts.ss_net_profit > 1.00) and (sts.ss_quantity > 0)) +------------------------------------------------------------------PhysicalOlapScan[store_sales(sts)] apply RFs: RF6 +--------------------------------------------------------------PhysicalProject +----------------------------------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1998)) +------------------------------------------------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query5.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query5.out new file mode 100644 index 00000000000000..f4faa729b2052d --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query5.out @@ -0,0 +1,76 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_5 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalRepeat +------------------PhysicalUnion +--------------------PhysicalProject +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((salesreturns.store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[sr_store_sk,ss_store_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((salesreturns.date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[sr_returned_date_sk,ss_sold_date_sk] +------------------------------------PhysicalUnion +--------------------------------------PhysicalDistribute[DistributionSpecExecutionAny] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +--------------------------------------PhysicalDistribute[DistributionSpecExecutionAny] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF0 RF1 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_date <= '2000-09-02') and (date_dim.d_date >= '2000-08-19')) +----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store] +--------------------PhysicalProject +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((salesreturns.page_sk = catalog_page.cp_catalog_page_sk)) otherCondition=() build RFs:RF3 cp_catalog_page_sk->[cr_catalog_page_sk,cs_catalog_page_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((salesreturns.date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cr_returned_date_sk,cs_sold_date_sk] +------------------------------------PhysicalUnion +--------------------------------------PhysicalDistribute[DistributionSpecExecutionAny] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +--------------------------------------PhysicalDistribute[DistributionSpecExecutionAny] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2 RF3 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_date <= '2000-09-02') and (date_dim.d_date >= '2000-08-19')) +----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_page] +--------------------PhysicalProject +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((salesreturns.wsr_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF7 web_site_sk->[ws_web_site_sk,ws_web_site_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((salesreturns.date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[wr_returned_date_sk,ws_sold_date_sk] +------------------------------------PhysicalUnion +--------------------------------------PhysicalDistribute[DistributionSpecExecutionAny] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7 +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_returns.wr_item_sk = web_sales.ws_item_sk) and (web_returns.wr_order_number = web_sales.ws_order_number)) otherCondition=() build RFs:RF4 wr_item_sk->[ws_item_sk];RF5 wr_order_number->[ws_order_number] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 RF7 +------------------------------------------PhysicalProject +--------------------------------------------PhysicalOlapScan[web_returns] apply RFs: RF6 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_date <= '2000-09-02') and (date_dim.d_date >= '2000-08-19')) +----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_site] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query50.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query50.out new file mode 100644 index 00000000000000..d391b632b30f2e --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query50.out @@ -0,0 +1,29 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_50 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_customer_sk = store_returns.sr_customer_sk) and (store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF1 sr_ticket_number->[ss_ticket_number];RF2 sr_item_sk->[ss_item_sk];RF3 sr_customer_sk->[ss_customer_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 RF5 +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_returned_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[sr_returned_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_returns] apply RFs: RF0 +------------------------------PhysicalProject +--------------------------------filter((d2.d_moy = 8) and (d2.d_year = 2001)) +----------------------------------PhysicalOlapScan[date_dim(d2)] +----------------------PhysicalProject +------------------------PhysicalOlapScan[date_dim(d1)] +------------------PhysicalProject +--------------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query51.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query51.out new file mode 100644 index 00000000000000..5acf6623389d03 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query51.out @@ -0,0 +1,40 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_51 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------filter((y.web_cumulative > y.store_cumulative)) +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((web.d_date = store.d_date) and (web.item_sk = store.item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------hashAgg[GLOBAL] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------hashAgg[LOCAL] +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1 +--------------------------------------PhysicalProject +----------------------------------------filter((date_dim.d_month_seq <= 1223) and (date_dim.d_month_seq >= 1212)) +------------------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalWindow +--------------------------PhysicalQuickSort[LOCAL_SORT] +----------------------------hashAgg[GLOBAL] +------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------hashAgg[LOCAL] +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------------------------------PhysicalProject +----------------------------------------filter((date_dim.d_month_seq <= 1223) and (date_dim.d_month_seq >= 1212)) +------------------------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query52.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query52.out new file mode 100644 index 00000000000000..5401ac3a92a539 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query52.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_52 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------------PhysicalProject +--------------------------filter((item.i_manager_id = 1)) +----------------------------PhysicalOlapScan[item] +--------------------PhysicalProject +----------------------filter((dt.d_moy = 12) and (dt.d_year = 2000)) +------------------------PhysicalOlapScan[date_dim(dt)] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query53.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query53.out new file mode 100644 index 00000000000000..390906ce2f8165 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query53.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_53 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------filter(((cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / tmp1.avg_quarterly_sales) > 0.100000) and (tmp1.avg_quarterly_sales > 0.0000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter(d_month_seq IN (1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query54.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query54.out new file mode 100644 index 00000000000000..e8e53738331886 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query54.out @@ -0,0 +1,74 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_54 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF16 d_date_sk->[ss_sold_date_sk];RF17 d_date_sk->[ss_sold_date_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((my_customers.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF14 c_customer_sk->[ss_customer_sk];RF15 c_customer_sk->[ss_customer_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF14 RF15 RF16 RF17 +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_county = store.s_county) and (customer_address.ca_state = store.s_state)) otherCondition=() build RFs:RF10 s_county->[ca_county];RF11 s_county->[ca_county];RF12 s_state->[ca_state];RF13 s_state->[ca_state] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((my_customers.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF8 c_current_addr_sk->[ca_address_sk];RF9 c_current_addr_sk->[ca_address_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF8 RF9 RF10 RF11 RF12 RF13 +----------------------------------------PhysicalProject +------------------------------------------hashAgg[GLOBAL] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_customer_sk = cs_or_ws_sales.customer_sk)) otherCondition=() build RFs:RF6 customer_sk->[c_customer_sk];RF7 customer_sk->[c_customer_sk] +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF6 RF7 +------------------------------------------------PhysicalProject +--------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs_or_ws_sales.sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[cs_sold_date_sk,ws_sold_date_sk];RF5 d_date_sk->[cs_sold_date_sk,ws_sold_date_sk] +----------------------------------------------------PhysicalProject +------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs_or_ws_sales.item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk,ws_item_sk];RF3 i_item_sk->[cs_item_sk,ws_item_sk] +--------------------------------------------------------PhysicalUnion +----------------------------------------------------------PhysicalDistribute[DistributionSpecExecutionAny] +------------------------------------------------------------PhysicalProject +--------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 RF4 RF5 +----------------------------------------------------------PhysicalDistribute[DistributionSpecExecutionAny] +------------------------------------------------------------PhysicalProject +--------------------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 RF4 RF5 +--------------------------------------------------------PhysicalProject +----------------------------------------------------------filter((item.i_category = 'Music') and (item.i_class = 'country')) +------------------------------------------------------------PhysicalOlapScan[item] +----------------------------------------------------PhysicalProject +------------------------------------------------------filter((date_dim.d_moy = 1) and (date_dim.d_year = 1999)) +--------------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store] +----------------------------PhysicalProject +------------------------------NestedLoopJoin[INNER_JOIN](cast(d_month_seq as BIGINT) <= d_month_seq+3) build RFs:RF1 d_month_seq+3->[cast(d_month_seq as BIGINT)] +--------------------------------PhysicalProject +----------------------------------NestedLoopJoin[INNER_JOIN](cast(d_month_seq as BIGINT) >= d_month_seq+1) build RFs:RF0 d_month_seq+1->[cast(d_month_seq as BIGINT)] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF0 RF1 +------------------------------------PhysicalAssertNumRows +--------------------------------------PhysicalDistribute[DistributionSpecGather] +----------------------------------------hashAgg[GLOBAL] +------------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------------hashAgg[LOCAL] +----------------------------------------------PhysicalProject +------------------------------------------------filter((date_dim.d_moy = 1) and (date_dim.d_year = 1999)) +--------------------------------------------------PhysicalOlapScan[date_dim] +--------------------------------PhysicalAssertNumRows +----------------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------------hashAgg[LOCAL] +------------------------------------------PhysicalProject +--------------------------------------------filter((date_dim.d_moy = 1) and (date_dim.d_year = 1999)) +----------------------------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query55.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query55.out new file mode 100644 index 00000000000000..b21e9d417e36dc --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query55.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_55 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------------PhysicalProject +--------------------------filter((item.i_manager_id = 52)) +----------------------------PhysicalOlapScan[item] +--------------------PhysicalProject +----------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 2000)) +------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query56.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query56.out new file mode 100644 index 00000000000000..9c20cb7fbf24ec --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query56.out @@ -0,0 +1,83 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_56 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalUnion +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ss_addr_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2000)) +----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=() build RFs:RF0 i_item_id->[i_item_id] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[item] apply RFs: RF0 +----------------------------------PhysicalProject +------------------------------------filter(i_color IN ('orchid', 'pink', 'powder')) +--------------------------------------PhysicalOlapScan[item] +----------------------------PhysicalProject +------------------------------filter((customer_address.ca_gmt_offset = -6.00)) +--------------------------------PhysicalOlapScan[customer_address] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF7 ca_address_sk->[cs_bill_addr_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF6 i_item_sk->[cs_item_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5 RF6 RF7 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2000)) +----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=() build RFs:RF4 i_item_id->[i_item_id] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[item] apply RFs: RF4 +----------------------------------PhysicalProject +------------------------------------filter(i_color IN ('orchid', 'pink', 'powder')) +--------------------------------------PhysicalOlapScan[item] +----------------------------PhysicalProject +------------------------------filter((customer_address.ca_gmt_offset = -6.00)) +--------------------------------PhysicalOlapScan[customer_address] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF11 ws_bill_addr_sk->[ca_address_sk] +----------------------------PhysicalProject +------------------------------filter((customer_address.ca_gmt_offset = -6.00)) +--------------------------------PhysicalOlapScan[customer_address] apply RFs: RF11 +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF10 i_item_sk->[ws_item_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[ws_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF9 RF10 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2000)) +----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=() build RFs:RF8 i_item_id->[i_item_id] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[item] apply RFs: RF8 +----------------------------------PhysicalProject +------------------------------------filter(i_color IN ('orchid', 'pink', 'powder')) +--------------------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query57.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query57.out new file mode 100644 index 00000000000000..add64f10604206 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query57.out @@ -0,0 +1,43 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_57 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------PhysicalWindow +--------PhysicalQuickSort[LOCAL_SORT] +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF1 cc_call_center_sk->[cs_call_center_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[call_center] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] +--PhysicalResultSink +----PhysicalProject +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((v1.cc_name = v1_lead.cc_name) and (v1.i_brand = v1_lead.i_brand) and (v1.i_category = v1_lead.i_category) and (v1.rn = expr_(rn - 1))) otherCondition=() build RFs:RF7 i_category->[i_category,i_category];RF8 i_brand->[i_brand,i_brand];RF9 cc_name->[cc_name,cc_name];RF10 expr_(rn - 1)->[(rn + 1),rn] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN shuffle] hashCondition=((v1.cc_name = v1_lag.cc_name) and (v1.i_brand = v1_lag.i_brand) and (v1.i_category = v1_lag.i_category) and (v1.rn = expr_(rn + 1))) otherCondition=() build RFs:RF3 i_category->[i_category];RF4 i_brand->[i_brand];RF5 cc_name->[cc_name];RF6 rn->[(rn + 1)] +--------------------PhysicalProject +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3 RF4 RF5 RF6 RF7 RF8 RF9 RF10 +--------------------filter(((cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / v2.avg_monthly_sales) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001)) +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF7 RF8 RF9 RF10 +----------------PhysicalProject +------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query58.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query58.out new file mode 100644 index 00000000000000..5a44c1cc3d2ca6 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query58.out @@ -0,0 +1,86 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_58 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashJoin[INNER_JOIN colocated] hashCondition=((ss_items.item_id = ws_items.item_id)) otherCondition=((cast(cs_item_rev as DECIMALV3(38, 3)) <= (1.1 * ws_items.ws_item_rev)) and (cast(cs_item_rev as DECIMALV3(38, 3)) >= (0.9 * ws_items.ws_item_rev)) and (cast(ss_item_rev as DECIMALV3(38, 3)) <= (1.1 * ws_items.ws_item_rev)) and (cast(ss_item_rev as DECIMALV3(38, 3)) >= (0.9 * ws_items.ws_item_rev)) and (cast(ws_item_rev as DECIMALV3(38, 3)) <= (1.1 * cs_items.cs_item_rev)) and (cast(ws_item_rev as DECIMALV3(38, 3)) <= (1.1 * ss_items.ss_item_rev)) and (cast(ws_item_rev as DECIMALV3(38, 3)) >= (0.9 * cs_items.cs_item_rev)) and (cast(ws_item_rev as DECIMALV3(38, 3)) >= (0.9 * ss_items.ss_item_rev))) build RFs:RF13 item_id->[i_item_id] +------------PhysicalProject +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF12 i_item_sk->[ws_item_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF11 d_date_sk->[ws_sold_date_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] apply RFs: RF11 RF12 +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF10 d_date->[d_date] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF10 +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF9 d_week_seq->[d_week_seq] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF9 +------------------------------------PhysicalAssertNumRows +--------------------------------------PhysicalDistribute[DistributionSpecGather] +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_date = '2001-06-16')) +--------------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] apply RFs: RF13 +------------PhysicalProject +--------------hashJoin[INNER_JOIN colocated] hashCondition=((ss_items.item_id = cs_items.item_id)) otherCondition=((cast(cs_item_rev as DECIMALV3(38, 3)) <= (1.1 * ss_items.ss_item_rev)) and (cast(cs_item_rev as DECIMALV3(38, 3)) >= (0.9 * ss_items.ss_item_rev)) and (cast(ss_item_rev as DECIMALV3(38, 3)) <= (1.1 * cs_items.cs_item_rev)) and (cast(ss_item_rev as DECIMALV3(38, 3)) >= (0.9 * cs_items.cs_item_rev))) build RFs:RF8 item_id->[i_item_id] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[cs_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[cs_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF6 RF7 +--------------------------------PhysicalProject +----------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF5 d_date->[d_date] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF5 +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF4 d_week_seq->[d_week_seq] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF4 +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------filter((date_dim.d_date = '2001-06-16')) +------------------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[item] apply RFs: RF8 +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 +--------------------------------PhysicalProject +----------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF1 d_date->[d_date] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1 +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF0 d_week_seq->[d_week_seq] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF0 +----------------------------------------PhysicalAssertNumRows +------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------PhysicalProject +----------------------------------------------filter((date_dim.d_date = '2001-06-16')) +------------------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query59.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query59.out new file mode 100644 index 00000000000000..050957da0f0a7a --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query59.out @@ -0,0 +1,45 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_59 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----hashAgg[GLOBAL] +------PhysicalDistribute[DistributionSpecHash] +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------PhysicalProject +----------------PhysicalOlapScan[date_dim] +--PhysicalResultSink +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------hashJoin[INNER_JOIN shuffle] hashCondition=((expr_cast(d_week_seq1 as BIGINT) = expr_(cast(d_week_seq2 as BIGINT) - 52)) and (y.s_store_id1 = x.s_store_id2)) otherCondition=() build RFs:RF5 s_store_id2->[s_store_id];RF6 expr_(cast(d_week_seq2 as BIGINT) - 52)->[cast(d_week_seq as BIGINT)] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((d.d_week_seq = d_week_seq1)) otherCondition=() build RFs:RF4 d_week_seq->[d_week_seq] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((wss.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] +----------------------PhysicalProject +------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3 RF4 RF6 +----------------------PhysicalProject +------------------------PhysicalOlapScan[store] apply RFs: RF5 +------------------PhysicalProject +--------------------filter((d.d_month_seq <= 1206) and (d.d_month_seq >= 1195)) +----------------------PhysicalOlapScan[date_dim(d)] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((d.d_week_seq = d_week_seq2)) otherCondition=() build RFs:RF2 d_week_seq->[d_week_seq] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((wss.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +----------------------PhysicalProject +------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF1 RF2 +----------------------PhysicalProject +------------------------PhysicalOlapScan[store] +------------------PhysicalProject +--------------------filter((d.d_month_seq <= 1218) and (d.d_month_seq >= 1207)) +----------------------PhysicalOlapScan[date_dim(d)] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query6.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query6.out new file mode 100644 index 00000000000000..9eead41add48b7 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query6.out @@ -0,0 +1,47 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_6 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------filter((cnt >= 10)) +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((a.ca_address_sk = c.c_current_addr_sk)) otherCondition=() build RFs:RF5 c_current_addr_sk->[ca_address_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer_address(a)] apply RFs: RF5 +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((c.c_customer_sk = s.ss_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer(c)] apply RFs: RF4 +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((s.ss_item_sk = i.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((s.ss_sold_date_sk = d.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales(s)] apply RFs: RF2 RF3 +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((d.d_month_seq = date_dim.d_month_seq)) otherCondition=() build RFs:RF1 d_month_seq->[d_month_seq] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[date_dim(d)] apply RFs: RF1 +--------------------------------------PhysicalAssertNumRows +----------------------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------------------hashAgg[GLOBAL] +--------------------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------------------hashAgg[LOCAL] +------------------------------------------------PhysicalProject +--------------------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2002)) +----------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((j.i_category = i.i_category)) otherCondition=((cast(i_current_price as DECIMALV3(38, 5)) > (1.2 * avg(j.i_current_price)))) build RFs:RF0 i_category->[i_category] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[item(i)] apply RFs: RF0 +----------------------------------hashAgg[GLOBAL] +------------------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------------hashAgg[LOCAL] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[item(j)] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query60.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query60.out new file mode 100644 index 00000000000000..f024124050e0f8 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query60.out @@ -0,0 +1,83 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_60 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalUnion +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF2 ca_address_sk->[ss_addr_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_moy = 10) and (date_dim.d_year = 2000)) +----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------PhysicalProject +----------------------------------filter((customer_address.ca_gmt_offset = -5.00)) +------------------------------------PhysicalOlapScan[customer_address] +----------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=() build RFs:RF0 i_item_id->[i_item_id] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[item] apply RFs: RF0 +------------------------------PhysicalProject +--------------------------------filter((item.i_category = 'Jewelry')) +----------------------------------PhysicalOlapScan[item] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[cs_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF6 ca_address_sk->[cs_bill_addr_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5 RF6 RF7 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_moy = 10) and (date_dim.d_year = 2000)) +----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------PhysicalProject +----------------------------------filter((customer_address.ca_gmt_offset = -5.00)) +------------------------------------PhysicalOlapScan[customer_address] +----------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=() build RFs:RF4 i_item_id->[i_item_id] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[item] apply RFs: RF4 +------------------------------PhysicalProject +--------------------------------filter((item.i_category = 'Jewelry')) +----------------------------------PhysicalOlapScan[item] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF11 ca_address_sk->[ws_bill_addr_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF10 i_item_sk->[ws_item_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[ws_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF9 RF10 RF11 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_moy = 10) and (date_dim.d_year = 2000)) +----------------------------------------PhysicalOlapScan[date_dim] +--------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=() build RFs:RF8 i_item_id->[i_item_id] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[item] apply RFs: RF8 +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Jewelry')) +--------------------------------------PhysicalOlapScan[item] +----------------------------PhysicalProject +------------------------------filter((customer_address.ca_gmt_offset = -5.00)) +--------------------------------PhysicalOlapScan[customer_address] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query61.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query61.out new file mode 100644 index 00000000000000..654f58923a6f2b --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query61.out @@ -0,0 +1,70 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_61 -- +PhysicalResultSink +--PhysicalTopN[GATHER_SORT] +----PhysicalProject +------NestedLoopJoin[CROSS_JOIN] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecGather] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF10 c_current_addr_sk->[ca_address_sk] +------------------PhysicalProject +--------------------filter((customer_address.ca_gmt_offset = -7.00)) +----------------------PhysicalOlapScan[customer_address] apply RFs: RF10 +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF9 ss_customer_sk->[c_customer_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] apply RFs: RF9 +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF8 s_store_sk->[ss_store_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[ss_promo_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF6 i_item_sk->[ss_item_sk] +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF7 RF8 +--------------------------------------PhysicalProject +----------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 2000)) +------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------PhysicalProject +------------------------------------filter((item.i_category = 'Home')) +--------------------------------------PhysicalOlapScan[item] +------------------------------PhysicalProject +--------------------------------filter(OR[(promotion.p_channel_dmail = 'Y'),(promotion.p_channel_email = 'Y'),(promotion.p_channel_tv = 'Y')]) +----------------------------------PhysicalOlapScan[promotion] +--------------------------PhysicalProject +----------------------------filter((store.s_gmt_offset = -7.00)) +------------------------------PhysicalOlapScan[store] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecGather] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF4 s_store_sk->[ss_store_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ss_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 2000)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter((item.i_category = 'Home')) +------------------------------PhysicalOlapScan[item] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[c_current_addr_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer] apply RFs: RF0 +--------------------------PhysicalProject +----------------------------filter((customer_address.ca_gmt_offset = -7.00)) +------------------------------PhysicalOlapScan[customer_address] +------------------PhysicalProject +--------------------filter((store.s_gmt_offset = -7.00)) +----------------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query62.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query62.out new file mode 100644 index 00000000000000..00e7e385d016fe --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query62.out @@ -0,0 +1,29 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_62 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_ship_mode_sk = ship_mode.sm_ship_mode_sk)) otherCondition=() build RFs:RF2 sm_ship_mode_sk->[ws_ship_mode_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_warehouse_sk = warehouse.w_warehouse_sk)) otherCondition=() build RFs:RF1 w_warehouse_sk->[ws_warehouse_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_ship_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 RF3 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1234) and (date_dim.d_month_seq >= 1223)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[warehouse] +----------------------PhysicalProject +------------------------PhysicalOlapScan[ship_mode] +------------------PhysicalProject +--------------------PhysicalOlapScan[web_site] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query63.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query63.out new file mode 100644 index 00000000000000..e6acab66744562 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query63.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_63 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------filter(((cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / tmp1.avg_monthly_sales) > 0.100000) and (tmp1.avg_monthly_sales > 0.0000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter(d_month_seq IN (1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query64.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query64.out new file mode 100644 index 00000000000000..13e1bc08953a35 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query64.out @@ -0,0 +1,102 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_64 -- +PhysicalCteAnchor ( cteId=CTEId#1 ) +--PhysicalCteProducer ( cteId=CTEId#1 ) +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecHash] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF19 i_item_sk->[cr_item_sk,cs_item_sk,sr_item_sk,ss_item_sk] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF18 ib_income_band_sk->[hd_income_band_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF17 ib_income_band_sk->[hd_income_band_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF16 ca_address_sk->[c_current_addr_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF15 ca_address_sk->[ss_addr_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF14 hd_demo_sk->[c_current_hdemo_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF13 hd_demo_sk->[ss_hdemo_sk] +----------------------------------------PhysicalProject +------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF12 p_promo_sk->[ss_promo_sk] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF11 cd_demo_sk->[c_current_cdemo_sk] +------------------------------------------------PhysicalProject +--------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF10 cd_demo_sk->[ss_cdemo_sk] +----------------------------------------------------PhysicalProject +------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[c_first_shipto_date_sk] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF8 d_date_sk->[c_first_sales_date_sk] +------------------------------------------------------------PhysicalProject +--------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF7 c_customer_sk->[ss_customer_sk] +----------------------------------------------------------------PhysicalProject +------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF6 s_store_sk->[ss_store_sk] +--------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF4 sr_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk];RF5 sr_ticket_number->[ss_ticket_number] +------------------------------------------------------------------------PhysicalProject +--------------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF3 cs_item_sk->[ss_item_sk] +----------------------------------------------------------------------------PhysicalProject +------------------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +--------------------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF4 RF5 RF6 RF7 RF10 RF12 RF13 RF15 RF19 +--------------------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------------------filter(d_year IN (1999, 2000)) +------------------------------------------------------------------------------------PhysicalOlapScan[date_dim(d1)] +----------------------------------------------------------------------------PhysicalProject +------------------------------------------------------------------------------filter((sale > (2 * refund))) +--------------------------------------------------------------------------------hashAgg[GLOBAL] +----------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------------------------------------------------hashAgg[LOCAL] +--------------------------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_item_sk->[cs_item_sk];RF1 cr_order_number->[cs_order_number] +------------------------------------------------------------------------------------------PhysicalProject +--------------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF4 RF19 +------------------------------------------------------------------------------------------PhysicalProject +--------------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF4 RF19 +------------------------------------------------------------------------PhysicalProject +--------------------------------------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF19 +--------------------------------------------------------------------PhysicalProject +----------------------------------------------------------------------PhysicalOlapScan[store] +----------------------------------------------------------------PhysicalProject +------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF8 RF9 RF11 RF14 RF16 +------------------------------------------------------------PhysicalProject +--------------------------------------------------------------PhysicalOlapScan[date_dim(d2)] +--------------------------------------------------------PhysicalProject +----------------------------------------------------------PhysicalOlapScan[date_dim(d3)] +----------------------------------------------------PhysicalProject +------------------------------------------------------PhysicalOlapScan[customer_demographics(cd1)] +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[customer_demographics(cd2)] +--------------------------------------------PhysicalProject +----------------------------------------------PhysicalOlapScan[promotion] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[household_demographics(hd1)] apply RFs: RF17 +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[household_demographics(hd2)] apply RFs: RF18 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[customer_address(ad1)] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[customer_address(ad2)] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[income_band(ib1)] +--------------------PhysicalProject +----------------------PhysicalOlapScan[income_band(ib2)] +----------------PhysicalProject +------------------filter((item.i_current_price <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 'lawn', 'misty', 'orange', 'pink')) +--------------------PhysicalOlapScan[item] +--PhysicalResultSink +----PhysicalQuickSort[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalQuickSort[LOCAL_SORT] +----------PhysicalProject +------------hashJoin[INNER_JOIN shuffle] hashCondition=((cs1.item_sk = cs2.item_sk) and (cs1.store_name = cs2.store_name) and (cs1.store_zip = cs2.store_zip)) otherCondition=((cs2.cnt <= cs1.cnt)) build RFs:RF20 item_sk->[item_sk];RF21 store_name->[store_name];RF22 store_zip->[store_zip] +--------------PhysicalProject +----------------filter((cs1.syear = 1999)) +------------------PhysicalCteConsumer ( cteId=CTEId#1 ) apply RFs: RF20 RF21 RF22 +--------------PhysicalProject +----------------filter((cs2.syear = 2000)) +------------------PhysicalCteConsumer ( cteId=CTEId#1 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query65.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query65.out new file mode 100644 index 00000000000000..267b157be5ff10 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query65.out @@ -0,0 +1,43 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_65 -- +PhysicalResultSink +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(store.s_store_name,item.i_item_desc,sc.revenue) lazySlots:(item.i_brand,item.i_current_price,item.i_wholesale_cost)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((sb.ss_store_sk = sc.ss_store_sk)) otherCondition=((cast(revenue as DECIMALV3(38, 5)) <= (0.1 * sb.ave))) build RFs:RF4 ss_store_sk->[s_store_sk,ss_store_sk] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = sc.ss_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((item.i_item_sk = sc.ss_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1187) and (date_dim.d_month_seq >= 1176)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalLazyMaterializeOlapScan[item lazySlots:(item.i_current_price,item.i_wholesale_cost,item.i_brand)] +--------------------PhysicalProject +----------------------PhysicalOlapScan[store] apply RFs: RF4 +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_month_seq <= 1187) and (date_dim.d_month_seq >= 1176)) +--------------------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query66.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query66.out new file mode 100644 index 00000000000000..fb7a3400881984 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query66.out @@ -0,0 +1,61 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_66 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------PhysicalUnion +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_warehouse_sk = warehouse.w_warehouse_sk)) otherCondition=() build RFs:RF3 w_warehouse_sk->[ws_warehouse_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF2 t_time_sk->[ws_sold_time_sk] +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_ship_mode_sk = ship_mode.sm_ship_mode_sk)) otherCondition=() build RFs:RF0 sm_ship_mode_sk->[ws_ship_mode_sk] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 RF3 +------------------------------------------PhysicalProject +--------------------------------------------filter(sm_carrier IN ('BOXBUNDLES', 'ORIENTAL')) +----------------------------------------------PhysicalOlapScan[ship_mode] +--------------------------------------PhysicalProject +----------------------------------------filter((date_dim.d_year = 2001)) +------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------PhysicalProject +------------------------------------filter((time_dim.t_time <= 71770) and (time_dim.t_time >= 42970)) +--------------------------------------PhysicalOlapScan[time_dim] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[warehouse] +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_warehouse_sk = warehouse.w_warehouse_sk)) otherCondition=() build RFs:RF7 w_warehouse_sk->[cs_warehouse_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF6 t_time_sk->[cs_sold_time_sk] +----------------------------------PhysicalProject +------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_ship_mode_sk = ship_mode.sm_ship_mode_sk)) otherCondition=() build RFs:RF4 sm_ship_mode_sk->[cs_ship_mode_sk] +------------------------------------------PhysicalProject +--------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 RF5 RF6 RF7 +------------------------------------------PhysicalProject +--------------------------------------------filter(sm_carrier IN ('BOXBUNDLES', 'ORIENTAL')) +----------------------------------------------PhysicalOlapScan[ship_mode] +--------------------------------------PhysicalProject +----------------------------------------filter((date_dim.d_year = 2001)) +------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------PhysicalProject +------------------------------------filter((time_dim.t_time <= 71770) and (time_dim.t_time >= 42970)) +--------------------------------------PhysicalOlapScan[time_dim] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[warehouse] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query67.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query67.out new file mode 100644 index 00000000000000..b5f6486b18a89a --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query67.out @@ -0,0 +1,32 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_67 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------filter((dw2.rk <= 100)) +----------PhysicalWindow +------------PhysicalPartitionTopN +--------------PhysicalDistribute[DistributionSpecHash] +----------------PhysicalPartitionTopN +------------------PhysicalProject +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalRepeat +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_month_seq <= 1228) and (date_dim.d_month_seq >= 1217)) +--------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query68.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query68.out new file mode 100644 index 00000000000000..e25d34e80cdd66 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query68.out @@ -0,0 +1,40 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_68 -- +PhysicalResultSink +--PhysicalProject +----PhysicalLazyMaterialize[materializedSlots:(customer.c_last_name,current_addr.ca_city,dn.bought_city,dn.ss_ticket_number,dn.extended_price,dn.extended_tax,dn.list_price) lazySlots:(customer.c_first_name)] +------PhysicalTopN[MERGE_SORT] +--------PhysicalDistribute[DistributionSpecGather] +----------PhysicalTopN[LOCAL_SORT] +------------PhysicalProject +--------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk] +----------------PhysicalProject +------------------PhysicalOlapScan[customer_address(current_addr)] apply RFs: RF5 +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk] +--------------------PhysicalProject +----------------------PhysicalLazyMaterializeOlapScan[customer lazySlots:(customer.c_first_name)] apply RFs: RF4 +--------------------PhysicalProject +----------------------hashAgg[GLOBAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000)) +--------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalProject +--------------------------------------filter(s_city IN ('Fairview', 'Midway')) +----------------------------------------PhysicalOlapScan[store] +--------------------------------PhysicalProject +----------------------------------filter(OR[(household_demographics.hd_dep_count = 3),(household_demographics.hd_vehicle_count = 4)]) +------------------------------------PhysicalOlapScan[household_demographics] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query69.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query69.out new file mode 100644 index 00000000000000..175f24b8cbd6dd --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query69.out @@ -0,0 +1,47 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_69 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF6 c_customer_sk->[ss_customer_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 +------------------------PhysicalProject +--------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2002)) +----------------------------PhysicalOlapScan[date_dim] +--------------------hashJoin[RIGHT_ANTI_JOIN shuffle] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[cs_ship_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3 RF4 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2002)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) otherCondition=() build RFs:RF2 c_current_cdemo_sk->[cd_demo_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF2 +--------------------------hashJoin[LEFT_ANTI_JOIN bucketShuffle] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[c_current_addr_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[customer(c)] apply RFs: RF1 +--------------------------------PhysicalProject +----------------------------------filter(ca_state IN ('IL', 'ME', 'TX')) +------------------------------------PhysicalOlapScan[customer_address(ca)] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_moy <= 3) and (date_dim.d_moy >= 1) and (date_dim.d_year = 2002)) +------------------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query7.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query7.out new file mode 100644 index 00000000000000..2d63af9e61b19e --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query7.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_7 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF2 p_promo_sk->[ss_promo_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)) otherCondition=() build RFs:RF0 cd_demo_sk->[ss_cdemo_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 +------------------------------PhysicalProject +--------------------------------filter((customer_demographics.cd_education_status = 'College') and (customer_demographics.cd_gender = 'F') and (customer_demographics.cd_marital_status = 'W')) +----------------------------------PhysicalOlapScan[customer_demographics] +--------------------------PhysicalProject +----------------------------filter((date_dim.d_year = 2001)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------filter(OR[(promotion.p_channel_email = 'N'),(promotion.p_channel_event = 'N')]) +--------------------------PhysicalOlapScan[promotion] +------------------PhysicalProject +--------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query70.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query70.out new file mode 100644 index 00000000000000..6f7209d31b58eb --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query70.out @@ -0,0 +1,44 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_70 -- +PhysicalResultSink +--PhysicalProject +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalRepeat +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF4 s_store_sk->[ss_store_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((d1.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF3 RF4 +------------------------------------PhysicalProject +--------------------------------------filter((d1.d_month_seq <= 1231) and (d1.d_month_seq >= 1220)) +----------------------------------------PhysicalOlapScan[date_dim(d1)] +--------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((store.s_state = tmp1.s_state)) otherCondition=() build RFs:RF2 s_state->[s_state] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store] apply RFs: RF2 +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] +--------------------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------------hashAgg[LOCAL] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +----------------------------------------------PhysicalProject +------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_month_seq <= 1231) and (date_dim.d_month_seq >= 1220)) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------------------PhysicalProject +------------------------------------------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query71.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query71.out new file mode 100644 index 00000000000000..88a4aed8de2b2f --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query71.out @@ -0,0 +1,36 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_71 -- +PhysicalResultSink +--PhysicalQuickSort[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalQuickSort[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((tmp.time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF2 t_time_sk->[cs_sold_time_sk,ss_sold_time_sk,ws_sold_time_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk,ss_sold_date_sk,ws_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((tmp.sold_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk,ss_item_sk,ws_item_sk] +----------------------------PhysicalUnion +------------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 +------------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 +------------------------------PhysicalDistribute[DistributionSpecExecutionAny] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------PhysicalProject +------------------------------filter((item.i_manager_id = 1)) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 2002)) +----------------------------PhysicalOlapScan[date_dim] +--------------------PhysicalProject +----------------------filter(t_meal_time IN ('breakfast', 'dinner')) +------------------------PhysicalOlapScan[time_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query72.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query72.out new file mode 100644 index 00000000000000..e3b47aad8ef6de --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query72.out @@ -0,0 +1,58 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_72 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[LEFT_OUTER_JOIN bucketShuffle] hashCondition=((catalog_returns.cr_item_sk = catalog_sales.cs_item_sk) and (catalog_returns.cr_order_number = catalog_sales.cs_order_number)) otherCondition=() +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((warehouse.w_warehouse_sk = inventory.inv_warehouse_sk)) otherCondition=() build RFs:RF8 w_warehouse_sk->[inv_warehouse_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = inventory.inv_item_sk) and (inventory.inv_date_sk = d2.d_date_sk)) otherCondition=((inventory.inv_quantity_on_hand < catalog_sales.cs_quantity)) build RFs:RF6 d_date_sk->[inv_date_sk];RF7 cs_item_sk->[inv_item_sk] +--------------------------PhysicalOlapScan[inventory] apply RFs: RF6 RF7 RF8 +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((d1.d_week_seq = d2.d_week_seq)) otherCondition=() build RFs:RF5 d_week_seq->[d_week_seq] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[date_dim(d2)] apply RFs: RF5 +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[cs_item_sk] +----------------------------------PhysicalProject +------------------------------------hashJoin[LEFT_OUTER_JOIN broadcast] hashCondition=((catalog_sales.cs_promo_sk = promotion.p_promo_sk)) otherCondition=() +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk)) otherCondition=() build RFs:RF3 cd_demo_sk->[cs_bill_cdemo_sk] +------------------------------------------PhysicalProject +--------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[cs_bill_hdemo_sk] +----------------------------------------------PhysicalProject +------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_ship_date_sk = d3.d_date_sk) and (catalog_sales.cs_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_ship_date_sk];RF1 d_date_sk->[cs_sold_date_sk] +--------------------------------------------------PhysicalProject +----------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 RF3 RF4 +--------------------------------------------------PhysicalProject +----------------------------------------------------NestedLoopJoin[INNER_JOIN](d3.d_date > days_add(d_date, 5)) +------------------------------------------------------PhysicalProject +--------------------------------------------------------PhysicalOlapScan[date_dim(d3)] +------------------------------------------------------PhysicalProject +--------------------------------------------------------filter((d1.d_year = 1998)) +----------------------------------------------------------PhysicalOlapScan[date_dim(d1)] +----------------------------------------------PhysicalProject +------------------------------------------------filter((household_demographics.hd_buy_potential = '1001-5000')) +--------------------------------------------------PhysicalOlapScan[household_demographics] +------------------------------------------PhysicalProject +--------------------------------------------filter((customer_demographics.cd_marital_status = 'S')) +----------------------------------------------PhysicalOlapScan[customer_demographics] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[promotion] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[item] +----------------------PhysicalProject +------------------------PhysicalOlapScan[warehouse] +------------------PhysicalProject +--------------------PhysicalOlapScan[catalog_returns] + + + + group expression count exceeds memo_max_group_expression_size(10000) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query73.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query73.out new file mode 100644 index 00000000000000..6b13afc6aa87cd --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query73.out @@ -0,0 +1,32 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_73 -- +PhysicalResultSink +--PhysicalQuickSort[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalQuickSort[LOCAL_SORT] +--------PhysicalProject +----------hashJoin[INNER_JOIN broadcast] hashCondition=((dj.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ss_customer_sk->[c_customer_sk] +------------PhysicalProject +--------------PhysicalOlapScan[customer] apply RFs: RF3 +------------filter((dj.cnt <= 5) and (dj.cnt >= 1)) +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (2000, 2001, 2002)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------filter((store.s_county = 'Williamson County')) +--------------------------------PhysicalOlapScan[store] +------------------------PhysicalProject +--------------------------filter(((cast(hd_dep_count as DOUBLE) / cast(hd_vehicle_count as DOUBLE)) > 1.0) and (household_demographics.hd_vehicle_count > 0) and hd_buy_potential IN ('1001-5000', '5001-10000')) +----------------------------PhysicalOlapScan[household_demographics] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query74.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query74.out new file mode 100644 index 00000000000000..1c089f592aeb68 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query74.out @@ -0,0 +1,63 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_74 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalUnion +------PhysicalProject +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------PhysicalProject +------------------------filter(d_year IN (1999, 2000)) +--------------------------PhysicalOlapScan[date_dim] +------------------PhysicalProject +--------------------PhysicalOlapScan[customer] +------PhysicalProject +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ws_bill_customer_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 +----------------------PhysicalProject +------------------------filter(d_year IN (1999, 2000)) +--------------------------PhysicalOlapScan[date_dim] +------------------PhysicalProject +--------------------PhysicalOlapScan[customer] +--PhysicalResultSink +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(13, 8)) / year_total), NULL) > if((year_total > 0.00), (cast(year_total as DECIMALV3(13, 8)) / year_total), NULL))) build RFs:RF6 customer_id->[customer_id,customer_id,customer_id] +--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=() build RFs:RF5 customer_id->[customer_id,customer_id] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN shuffle] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=() build RFs:RF4 customer_id->[customer_id] +--------------------PhysicalProject +----------------------filter((t_s_firstyear.sale_type = 's') and (t_s_firstyear.year = 1999) and (t_s_firstyear.year_total > 0.00)) +------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4 RF5 RF6 +--------------------PhysicalProject +----------------------filter((t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year = 1999) and (t_w_firstyear.year_total > 0.00)) +------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 RF6 +----------------PhysicalProject +------------------filter((t_s_secyear.sale_type = 's') and (t_s_secyear.year = 2000)) +--------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6 +--------------PhysicalProject +----------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year = 2000)) +------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query75.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query75.out new file mode 100644 index 00000000000000..cb7767db6f41ad --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query75.out @@ -0,0 +1,68 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_75 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----hashAgg[GLOBAL] +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecHash] +----------hashAgg[LOCAL] +------------PhysicalUnion +--------------PhysicalProject +----------------hashJoin[RIGHT_OUTER_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF2 cs_order_number->[cr_order_number];RF3 cs_item_sk->[cr_item_sk] +------------------PhysicalProject +--------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2 RF3 +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 +--------------------------PhysicalProject +----------------------------filter((item.i_category = 'Sports')) +------------------------------PhysicalOlapScan[item] +----------------------PhysicalProject +------------------------filter(d_year IN (2001, 2002)) +--------------------------PhysicalOlapScan[date_dim] +--------------PhysicalProject +----------------hashJoin[RIGHT_OUTER_JOIN shuffle] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF6 ss_ticket_number->[sr_ticket_number];RF7 ss_item_sk->[sr_item_sk] +------------------PhysicalProject +--------------------PhysicalOlapScan[store_returns] apply RFs: RF6 RF7 +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ss_item_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5 +--------------------------PhysicalProject +----------------------------filter((item.i_category = 'Sports')) +------------------------------PhysicalOlapScan[item] +----------------------PhysicalProject +------------------------filter(d_year IN (2001, 2002)) +--------------------------PhysicalOlapScan[date_dim] +--------------PhysicalProject +----------------hashJoin[RIGHT_OUTER_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=() build RFs:RF10 ws_order_number->[wr_order_number];RF11 ws_item_sk->[wr_item_sk] +------------------PhysicalProject +--------------------PhysicalOlapScan[web_returns] apply RFs: RF10 RF11 +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[ws_sold_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF8 i_item_sk->[ws_item_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF8 RF9 +--------------------------PhysicalProject +----------------------------filter((item.i_category = 'Sports')) +------------------------------PhysicalOlapScan[item] +----------------------PhysicalProject +------------------------filter(d_year IN (2001, 2002)) +--------------------------PhysicalOlapScan[date_dim] +--PhysicalResultSink +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------hashJoin[INNER_JOIN shuffle] hashCondition=((curr_yr.i_brand_id = prev_yr.i_brand_id) and (curr_yr.i_category_id = prev_yr.i_category_id) and (curr_yr.i_class_id = prev_yr.i_class_id) and (curr_yr.i_manufact_id = prev_yr.i_manufact_id)) otherCondition=(((cast(cast(sales_cnt as DECIMALV3(17, 2)) as DECIMALV3(23, 8)) / cast(sales_cnt as DECIMALV3(17, 2))) < 0.900000)) build RFs:RF12 i_brand_id->[i_brand_id];RF13 i_class_id->[i_class_id];RF14 i_category_id->[i_category_id];RF15 i_manufact_id->[i_manufact_id] +--------------filter((curr_yr.d_year = 2002)) +----------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF12 RF13 RF14 RF15 +--------------filter((prev_yr.d_year = 2001)) +----------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query76.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query76.out new file mode 100644 index 00000000000000..13989b9aed9734 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query76.out @@ -0,0 +1,38 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_76 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk,ss_sold_date_sk,ws_sold_date_sk] +------------------PhysicalUnion +--------------------PhysicalDistribute[DistributionSpecExecutionAny] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +--------------------------PhysicalProject +----------------------------filter(ss_customer_sk IS NULL) +------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF3 +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[item] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 ws_item_sk->[i_item_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] apply RFs: RF1 +------------------------PhysicalProject +--------------------------filter(ws_promo_sk IS NULL) +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF3 +--------------------PhysicalDistribute[DistributionSpecExecutionAny] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk] +--------------------------PhysicalProject +----------------------------filter(cs_bill_customer_sk IS NULL) +------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[item] +------------------PhysicalProject +--------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query77.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query77.out new file mode 100644 index 00000000000000..3659671c869dc8 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query77.out @@ -0,0 +1,101 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_77 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalRepeat +------------------PhysicalUnion +--------------------PhysicalProject +----------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((ss.s_store_sk = sr.s_store_sk)) otherCondition=() +------------------------PhysicalProject +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_date <= '2000-09-09') and (date_dim.d_date >= '2000-08-10')) +--------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store] +------------------------PhysicalProject +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[sr_store_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[sr_returned_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF0 RF1 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_date <= '2000-09-09') and (date_dim.d_date >= '2000-08-10')) +--------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store] +--------------------PhysicalProject +----------------------NestedLoopJoin[CROSS_JOIN] +------------------------PhysicalProject +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_date <= '2000-09-09') and (date_dim.d_date >= '2000-08-10')) +----------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_returns.cr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[cr_returned_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF4 +------------------------------------PhysicalProject +--------------------------------------filter((date_dim.d_date <= '2000-09-09') and (date_dim.d_date >= '2000-08-10')) +----------------------------------------PhysicalOlapScan[date_dim] +--------------------PhysicalProject +----------------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((ws.wp_web_page_sk = wr.wp_web_page_sk)) otherCondition=() +------------------------PhysicalProject +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_web_page_sk = web_page.wp_web_page_sk)) otherCondition=() build RFs:RF9 wp_web_page_sk->[ws_web_page_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF8 d_date_sk->[ws_sold_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF8 RF9 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_date <= '2000-09-09') and (date_dim.d_date >= '2000-08-10')) +--------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[web_page] +------------------------PhysicalProject +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_returns.wr_web_page_sk = web_page.wp_web_page_sk)) otherCondition=() build RFs:RF7 wp_web_page_sk->[wr_web_page_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_returns.wr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[wr_returned_date_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[web_returns] apply RFs: RF6 RF7 +----------------------------------------PhysicalProject +------------------------------------------filter((date_dim.d_date <= '2000-09-09') and (date_dim.d_date >= '2000-08-10')) +--------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[web_page] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query78.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query78.out new file mode 100644 index 00000000000000..a3b55973d303c7 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query78.out @@ -0,0 +1,57 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_78 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------filter(OR[(coalesce(ws_qty, 0) > 0),(coalesce(cs_qty, 0) > 0)]) +------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((cs.cs_customer_sk = ss.ss_customer_sk) and (cs.cs_item_sk = ss.ss_item_sk) and (cs.cs_sold_year = ss.ss_sold_year)) otherCondition=() +--------------PhysicalProject +----------------hashJoin[LEFT_OUTER_JOIN colocated] hashCondition=((ws.ws_customer_sk = ss.ss_customer_sk) and (ws.ws_item_sk = ss.ss_item_sk) and (ws.ws_sold_year = ss.ss_sold_year)) otherCondition=() +------------------PhysicalProject +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[LEFT_ANTI_JOIN bucketShuffle] hashCondition=((store_returns.sr_ticket_number = store_sales.ss_ticket_number) and (store_sales.ss_item_sk = store_returns.sr_item_sk)) otherCondition=() +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_returns] +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_year = 1998)) +----------------------------------PhysicalOlapScan[date_dim] +------------------PhysicalProject +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------hashJoin[LEFT_ANTI_JOIN shuffle] hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and (web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=() +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1 +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_year = 1998)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_returns] +--------------PhysicalProject +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[LEFT_ANTI_JOIN bucketShuffle] hashCondition=((catalog_returns.cr_order_number = catalog_sales.cs_order_number) and (catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)) otherCondition=() +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_returns] +--------------------------PhysicalProject +----------------------------filter((date_dim.d_year = 1998)) +------------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query79.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query79.out new file mode 100644 index 00000000000000..605562961aa019 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query79.out @@ -0,0 +1,32 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_79 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((ms.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ss_customer_sk] +------------PhysicalProject +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF1 hd_demo_sk->[ss_hdemo_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_dow = 1) and d_year IN (2000, 2001, 2002)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------filter(OR[(household_demographics.hd_dep_count = 7),(household_demographics.hd_vehicle_count > -1)]) +--------------------------------PhysicalOlapScan[household_demographics] +------------------------PhysicalProject +--------------------------filter((store.s_number_employees <= 295) and (store.s_number_employees >= 200)) +----------------------------PhysicalOlapScan[store] +------------PhysicalProject +--------------PhysicalOlapScan[customer] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query8.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query8.out new file mode 100644 index 00000000000000..837e29a358bace --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query8.out @@ -0,0 +1,47 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_8 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((expr_substring(s_zip, 1, 2) = expr_substring(ca_zip, 1, 2))) otherCondition=() +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_qoy = 2) and (date_dim.d_year = 1998)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[store] +------------------PhysicalProject +--------------------PhysicalIntersect RFV2: RF3[ca_zip->substring(ca_zip, 1, 5)] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------filter(substring(ca_zip, 1, 5) IN ('10298', '10374', '10425', '11340', '11489', '11618', '11652', '11686', '11855', '11912', '12197', '12318', '12320', '12350', '13086', '13123', '13261', '13338', '13376', '13378', '13443', '13844', '13869', '13918', '14073', '14155', '14196', '14242', '14312', '14440', '14530', '14851', '15371', '15475', '15543', '15734', '15751', '15782', '15794', '16005', '16226', '16364', '16515', '16704', '16791', '16891', '17167', '17193', '17291', '17672', '17819', '17879', '17895', '18218', '18360', '18367', '18410', '18421', '18434', '18569', '18700', '18767', '18829', '18884', '19326', '19444', '19489', '19753', '19833', '19988', '20244', '20317', '20534', '20601', '20712', '21060', '21094', '21204', '21231', '21343', '21727', '21800', '21814', '22728', '22815', '22911', '23065', '23952', '24227', '24255', '24286', '24594', '24660', '24891', '24987', '25115', '25178', '25214', '25264', '25333', '25494', '25717', '25973', '26217', '26689', '27052', '27116', '27156', '27287', '27369', '27385', '27413', '27642', '27700', '28055', '28239', '28571', '28577', '28810', '29086', '29392', '29450', '29752', '29818', '30106', '30415', '30621', '31013', '31016', '31655', '31830', '32489', '32669', '32754', '32919', '32958', '32961', '33113', '33122', '33159', '33467', '33562', '33773', '33869', '34306', '34473', '34594', '34948', '34972', '35076', '35390', '35834', '35863', '35926', '36201', '36335', '36430', '36479', '37119', '37788', '37914', '38353', '38607', '38919', '39214', '39459', '39500', '39503', '40146', '40936', '40979', '41162', '41232', '41255', '41331', '41351', '41352', '41419', '41807', '41836', '41967', '42361', '43432', '43639', '43830', '43933', '44529', '45266', '45484', '45533', '45645', '45676', '45859', '46081', '46131', '46507', '47289', '47369', '47529', '47602', '47770', '48017', '48162', '48333', '48530', '48567', '49101', '49130', '49140', '49211', '49230', '49254', '49472', '50412', '50632', '50636', '50679', '50788', '51089', '51184', '51195', '51634', '51717', '51766', '51782', '51793', '51933', '52094', '52301', '52389', '52868', '53163', '53535', '53565', '54010', '54207', '54364', '54558', '54585', '55233', '55349', '56224', '56355', '56436', '56455', '56600', '56877', '57025', '57553', '57631', '57649', '57839', '58032', '58058', '58062', '58117', '58218', '58412', '58454', '58581', '59004', '59080', '59130', '59226', '59345', '59386', '59494', '59852', '60083', '60298', '60560', '60624', '60736', '61527', '61794', '61860', '61997', '62361', '62585', '62878', '63073', '63180', '63193', '63294', '63792', '63991', '64592', '65148', '65177', '65501', '66057', '66943', '67881', '67975', '67998', '68101', '68293', '68341', '68605', '68730', '68770', '68843', '68852', '68908', '69280', '69952', '69998', '70041', '70070', '70073', '70450', '71144', '71256', '71286', '71836', '71948', '71954', '71997', '72592', '72991', '73021', '73108', '73134', '73146', '73219', '73873', '74686', '75660', '75675', '75742', '75752', '77454', '77817', '78093', '78366', '79077', '79658', '80332', '80846', '81003', '81070', '81084', '81335', '81504', '81755', '81963', '82080', '82602', '82620', '83041', '83086', '83583', '83647', '83833', '83910', '83986', '84247', '84680', '84844', '84919', '85066', '85761', '86057', '86379', '86709', '88086', '88137', '88217', '89193', '89338', '90209', '90229', '90669', '91110', '91894', '92292', '92380', '92645', '92696', '93498', '94791', '94835', '94898', '95042', '95430', '95464', '95694', '96435', '96560', '97173', '97462', '98069', '98072', '98338', '98533', '98569', '98584', '98862', '99060', '99132')) +--------------------------------PhysicalOlapScan[customer_address] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------filter((cnt > 10)) +--------------------------------hashAgg[GLOBAL] +----------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------hashAgg[LOCAL] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[c_current_addr_sk] +------------------------------------------PhysicalProject +--------------------------------------------filter((customer.c_preferred_cust_flag = 'Y')) +----------------------------------------------PhysicalOlapScan[customer] apply RFs: RF0 +------------------------------------------PhysicalProject +--------------------------------------------filter(substring(ca_zip, 1, 5) IN ('10298', '10374', '10425', '11340', '11489', '11618', '11652', '11686', '11855', '11912', '12197', '12318', '12320', '12350', '13086', '13123', '13261', '13338', '13376', '13378', '13443', '13844', '13869', '13918', '14073', '14155', '14196', '14242', '14312', '14440', '14530', '14851', '15371', '15475', '15543', '15734', '15751', '15782', '15794', '16005', '16226', '16364', '16515', '16704', '16791', '16891', '17167', '17193', '17291', '17672', '17819', '17879', '17895', '18218', '18360', '18367', '18410', '18421', '18434', '18569', '18700', '18767', '18829', '18884', '19326', '19444', '19489', '19753', '19833', '19988', '20244', '20317', '20534', '20601', '20712', '21060', '21094', '21204', '21231', '21343', '21727', '21800', '21814', '22728', '22815', '22911', '23065', '23952', '24227', '24255', '24286', '24594', '24660', '24891', '24987', '25115', '25178', '25214', '25264', '25333', '25494', '25717', '25973', '26217', '26689', '27052', '27116', '27156', '27287', '27369', '27385', '27413', '27642', '27700', '28055', '28239', '28571', '28577', '28810', '29086', '29392', '29450', '29752', '29818', '30106', '30415', '30621', '31013', '31016', '31655', '31830', '32489', '32669', '32754', '32919', '32958', '32961', '33113', '33122', '33159', '33467', '33562', '33773', '33869', '34306', '34473', '34594', '34948', '34972', '35076', '35390', '35834', '35863', '35926', '36201', '36335', '36430', '36479', '37119', '37788', '37914', '38353', '38607', '38919', '39214', '39459', '39500', '39503', '40146', '40936', '40979', '41162', '41232', '41255', '41331', '41351', '41352', '41419', '41807', '41836', '41967', '42361', '43432', '43639', '43830', '43933', '44529', '45266', '45484', '45533', '45645', '45676', '45859', '46081', '46131', '46507', '47289', '47369', '47529', '47602', '47770', '48017', '48162', '48333', '48530', '48567', '49101', '49130', '49140', '49211', '49230', '49254', '49472', '50412', '50632', '50636', '50679', '50788', '51089', '51184', '51195', '51634', '51717', '51766', '51782', '51793', '51933', '52094', '52301', '52389', '52868', '53163', '53535', '53565', '54010', '54207', '54364', '54558', '54585', '55233', '55349', '56224', '56355', '56436', '56455', '56600', '56877', '57025', '57553', '57631', '57649', '57839', '58032', '58058', '58062', '58117', '58218', '58412', '58454', '58581', '59004', '59080', '59130', '59226', '59345', '59386', '59494', '59852', '60083', '60298', '60560', '60624', '60736', '61527', '61794', '61860', '61997', '62361', '62585', '62878', '63073', '63180', '63193', '63294', '63792', '63991', '64592', '65148', '65177', '65501', '66057', '66943', '67881', '67975', '67998', '68101', '68293', '68341', '68605', '68730', '68770', '68843', '68852', '68908', '69280', '69952', '69998', '70041', '70070', '70073', '70450', '71144', '71256', '71286', '71836', '71948', '71954', '71997', '72592', '72991', '73021', '73108', '73134', '73146', '73219', '73873', '74686', '75660', '75675', '75742', '75752', '77454', '77817', '78093', '78366', '79077', '79658', '80332', '80846', '81003', '81070', '81084', '81335', '81504', '81755', '81963', '82080', '82602', '82620', '83041', '83086', '83583', '83647', '83833', '83910', '83986', '84247', '84680', '84844', '84919', '85066', '85761', '86057', '86379', '86709', '88086', '88137', '88217', '89193', '89338', '90209', '90229', '90669', '91110', '91894', '92292', '92380', '92645', '92696', '93498', '94791', '94835', '94898', '95042', '95430', '95464', '95694', '96435', '96560', '97173', '97462', '98069', '98072', '98338', '98533', '98569', '98584', '98862', '99060', '99132')) +----------------------------------------------PhysicalOlapScan[customer_address] RFV2: RF3 + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query80.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query80.out new file mode 100644 index 00000000000000..6661e34bc17afc --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query80.out @@ -0,0 +1,100 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_80 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalRepeat +------------------PhysicalUnion +--------------------PhysicalProject +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------hashJoin[RIGHT_OUTER_JOIN shuffle] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF4 ss_item_sk->[sr_item_sk];RF5 ss_ticket_number->[sr_ticket_number] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF5 +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +----------------------------------------PhysicalProject +------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 +------------------------------------------------PhysicalProject +--------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14')) +----------------------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------------PhysicalProject +----------------------------------------------filter((promotion.p_channel_tv = 'N')) +------------------------------------------------PhysicalOlapScan[promotion] +----------------------------------------PhysicalProject +------------------------------------------filter((item.i_current_price > 50.00)) +--------------------------------------------PhysicalOlapScan[item] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store] +--------------------PhysicalProject +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_catalog_page_sk = catalog_page.cp_catalog_page_sk)) otherCondition=() build RFs:RF11 cp_catalog_page_sk->[cs_catalog_page_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[RIGHT_OUTER_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF9 cs_item_sk->[cr_item_sk];RF10 cs_order_number->[cr_order_number] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF9 RF10 +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 i_item_sk->[cs_item_sk] +----------------------------------------PhysicalProject +------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[cs_promo_sk] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[cs_sold_date_sk] +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF6 RF7 RF8 RF11 +------------------------------------------------PhysicalProject +--------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14')) +----------------------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------------PhysicalProject +----------------------------------------------filter((promotion.p_channel_tv = 'N')) +------------------------------------------------PhysicalOlapScan[promotion] +----------------------------------------PhysicalProject +------------------------------------------filter((item.i_current_price > 50.00)) +--------------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_page] +--------------------PhysicalProject +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------hashJoin[RIGHT_OUTER_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=() build RFs:RF16 ws_item_sk->[wr_item_sk];RF17 ws_order_number->[wr_order_number] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_returns] apply RFs: RF16 RF17 +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF15 web_site_sk->[ws_web_site_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF14 i_item_sk->[ws_item_sk] +----------------------------------------PhysicalProject +------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF13 p_promo_sk->[ws_promo_sk] +--------------------------------------------PhysicalProject +----------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF12 d_date_sk->[ws_sold_date_sk] +------------------------------------------------PhysicalProject +--------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF12 RF13 RF14 RF15 +------------------------------------------------PhysicalProject +--------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14')) +----------------------------------------------------PhysicalOlapScan[date_dim] +--------------------------------------------PhysicalProject +----------------------------------------------filter((promotion.p_channel_tv = 'N')) +------------------------------------------------PhysicalOlapScan[promotion] +----------------------------------------PhysicalProject +------------------------------------------filter((item.i_current_price > 50.00)) +--------------------------------------------PhysicalOlapScan[item] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[web_site] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query81.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query81.out new file mode 100644 index 00000000000000..ddcb393e26ebad --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query81.out @@ -0,0 +1,41 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_81 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecHash] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_returns.cr_returning_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[cr_returning_addr_sk] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_returns.cr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cr_returned_date_sk] +--------------------PhysicalProject +----------------------PhysicalOlapScan[catalog_returns] apply RFs: RF0 RF1 +--------------------PhysicalProject +----------------------filter((date_dim.d_year = 2001)) +------------------------PhysicalOlapScan[date_dim] +----------------PhysicalProject +------------------PhysicalOlapScan[customer_address] +--PhysicalResultSink +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF4 +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] apply RFs: RF3 +------------------PhysicalProject +--------------------filter((customer_address.ca_state = 'TN')) +----------------------PhysicalOlapScan[customer_address] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalDistribute[DistributionSpecExecutionAny] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query82.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query82.out new file mode 100644 index 00000000000000..a1bb3a33e1d0ff --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query82.out @@ -0,0 +1,27 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_82 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +------------------PhysicalProject +--------------------PhysicalOlapScan[store_sales] apply RFs: RF2 +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk] +--------------------------PhysicalProject +----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100)) +------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1 +--------------------------PhysicalProject +----------------------------filter((item.i_current_price <= 88.00) and (item.i_current_price >= 58.00) and i_manufact_id IN (259, 485, 559, 580)) +------------------------------PhysicalOlapScan[item] +----------------------PhysicalProject +------------------------filter((date_dim.d_date <= '2001-03-14') and (date_dim.d_date >= '2001-01-13')) +--------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query83.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query83.out new file mode 100644 index 00000000000000..bb6216b2f45c7e --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query83.out @@ -0,0 +1,80 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_83 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashJoin[INNER_JOIN colocated] hashCondition=((sr_items.item_id = wr_items.item_id)) otherCondition=() build RFs:RF13 item_id->[i_item_id,i_item_id] +------------PhysicalProject +--------------hashJoin[INNER_JOIN colocated] hashCondition=((sr_items.item_id = cr_items.item_id)) otherCondition=() build RFs:RF12 item_id->[i_item_id] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF11 i_item_sk->[sr_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF10 d_date_sk->[sr_returned_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_returns] apply RFs: RF10 RF11 +--------------------------------PhysicalProject +----------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF9 d_date->[d_date] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF9 +------------------------------------PhysicalProject +--------------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF8 d_week_seq->[d_week_seq] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF8 +----------------------------------------PhysicalProject +------------------------------------------filter(d_date IN ('2001-07-13', '2001-09-10', '2001-11-16')) +--------------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[item] apply RFs: RF12 RF13 +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_returns.cr_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[cr_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_returns.cr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[cr_returned_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF6 RF7 +--------------------------------PhysicalProject +----------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF5 d_date->[d_date] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF5 +------------------------------------PhysicalProject +--------------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF4 d_week_seq->[d_week_seq] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF4 +----------------------------------------PhysicalProject +------------------------------------------filter(d_date IN ('2001-07-13', '2001-09-10', '2001-11-16')) +--------------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[item] apply RFs: RF13 +------------PhysicalProject +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_returns.wr_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[wr_item_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_returns.wr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[wr_returned_date_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_returns] apply RFs: RF2 RF3 +----------------------------PhysicalProject +------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF1 d_date->[d_date] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1 +--------------------------------PhysicalProject +----------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF0 d_week_seq->[d_week_seq] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF0 +------------------------------------PhysicalProject +--------------------------------------filter(d_date IN ('2001-07-13', '2001-09-10', '2001-11-16')) +----------------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query84.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query84.out new file mode 100644 index 00000000000000..46602027cd3688 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query84.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_84 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_cdemo_sk = customer_demographics.cd_demo_sk)) otherCondition=() build RFs:RF4 cd_demo_sk->[sr_cdemo_sk] +------------PhysicalProject +--------------PhysicalOlapScan[store_returns] apply RFs: RF4 +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk)) otherCondition=() build RFs:RF3 c_current_cdemo_sk->[cd_demo_sk] +----------------PhysicalProject +------------------PhysicalOlapScan[customer_demographics] apply RFs: RF3 +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((household_demographics.hd_demo_sk = customer.c_current_hdemo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[c_current_hdemo_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[c_current_addr_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[customer] apply RFs: RF1 RF2 +------------------------PhysicalProject +--------------------------filter((customer_address.ca_city = 'Woodland')) +----------------------------PhysicalOlapScan[customer_address] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((income_band.ib_income_band_sk = household_demographics.hd_income_band_sk)) otherCondition=() build RFs:RF0 ib_income_band_sk->[hd_income_band_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[household_demographics] apply RFs: RF0 +------------------------PhysicalProject +--------------------------filter((income_band.ib_lower_bound >= 60306) and (income_band.ib_upper_bound <= 110306)) +----------------------------PhysicalOlapScan[income_band] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query85.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query85.out new file mode 100644 index 00000000000000..27f904f559e587 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query85.out @@ -0,0 +1,46 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_85 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((reason.r_reason_sk = web_returns.wr_reason_sk)) otherCondition=() build RFs:RF9 r_reason_sk->[wr_reason_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cd1.cd_education_status = cd2.cd_education_status) and (cd1.cd_marital_status = cd2.cd_marital_status) and (cd2.cd_demo_sk = web_returns.wr_returning_cdemo_sk)) otherCondition=() build RFs:RF6 wr_returning_cdemo_sk->[cd_demo_sk];RF7 cd_marital_status->[cd_marital_status];RF8 cd_education_status->[cd_education_status] +------------------------PhysicalProject +--------------------------filter(cd_education_status IN ('Advanced Degree', 'College', 'Primary') and cd_marital_status IN ('D', 'S', 'U')) +----------------------------PhysicalOlapScan[customer_demographics(cd2)] apply RFs: RF6 RF7 RF8 +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_web_page_sk = web_page.wp_web_page_sk)) otherCondition=() build RFs:RF5 wp_web_page_sk->[ws_web_page_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = web_returns.wr_refunded_addr_sk)) otherCondition=(OR[AND[ca_state IN ('IA', 'NC', 'TX'),(web_sales.ws_net_profit >= 100.00),(web_sales.ws_net_profit <= 200.00)],AND[ca_state IN ('GA', 'WI', 'WV'),(web_sales.ws_net_profit >= 150.00)],AND[ca_state IN ('KY', 'OK', 'VA'),(web_sales.ws_net_profit <= 250.00)]]) build RFs:RF4 wr_refunded_addr_sk->[ca_address_sk] +--------------------------------PhysicalProject +----------------------------------filter((customer_address.ca_country = 'United States') and ca_state IN ('GA', 'IA', 'KY', 'NC', 'OK', 'TX', 'VA', 'WI', 'WV')) +------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF4 +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cd1.cd_demo_sk = web_returns.wr_refunded_cdemo_sk)) otherCondition=(OR[AND[(cd1.cd_marital_status = 'D'),(cd1.cd_education_status = 'Primary'),(web_sales.ws_sales_price >= 100.00),(web_sales.ws_sales_price <= 150.00)],AND[(cd1.cd_marital_status = 'S'),(cd1.cd_education_status = 'College'),(web_sales.ws_sales_price <= 100.00)],AND[(cd1.cd_marital_status = 'U'),(cd1.cd_education_status = 'Advanced Degree'),(web_sales.ws_sales_price >= 150.00)]]) build RFs:RF3 cd_demo_sk->[wr_refunded_cdemo_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and (web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=() build RFs:RF1 ws_item_sk->[wr_item_sk];RF2 ws_order_number->[wr_order_number] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[web_returns] apply RFs: RF1 RF2 RF3 RF9 +----------------------------------------PhysicalProject +------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +--------------------------------------------PhysicalProject +----------------------------------------------filter((web_sales.ws_net_profit <= 300.00) and (web_sales.ws_net_profit >= 50.00) and (web_sales.ws_sales_price <= 200.00) and (web_sales.ws_sales_price >= 50.00)) +------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF5 +--------------------------------------------PhysicalProject +----------------------------------------------filter((date_dim.d_year = 1998)) +------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[(cd1.cd_marital_status = 'D'),(cd1.cd_education_status = 'Primary')],AND[(cd1.cd_marital_status = 'S'),(cd1.cd_education_status = 'College')],AND[(cd1.cd_marital_status = 'U'),(cd1.cd_education_status = 'Advanced Degree')]] and cd_education_status IN ('Advanced Degree', 'College', 'Primary') and cd_marital_status IN ('D', 'S', 'U')) +----------------------------------------PhysicalOlapScan[customer_demographics(cd1)] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_page] +--------------------PhysicalProject +----------------------PhysicalOlapScan[reason] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query86.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query86.out new file mode 100644 index 00000000000000..1633f90b97fbc0 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query86.out @@ -0,0 +1,28 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_86 -- +PhysicalResultSink +--PhysicalProject +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------PhysicalWindow +--------------PhysicalQuickSort[LOCAL_SORT] +----------------PhysicalDistribute[DistributionSpecHash] +------------------PhysicalProject +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------hashAgg[LOCAL] +--------------------------PhysicalRepeat +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ws_item_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((d1.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 +------------------------------------PhysicalProject +--------------------------------------filter((d1.d_month_seq <= 1197) and (d1.d_month_seq >= 1186)) +----------------------------------------PhysicalOlapScan[date_dim(d1)] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query87.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query87.out new file mode 100644 index 00000000000000..aa6a97e5abf73c --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query87.out @@ -0,0 +1,60 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_87 -- +PhysicalResultSink +--hashAgg[GLOBAL] +----PhysicalDistribute[DistributionSpecGather] +------hashAgg[LOCAL] +--------PhysicalProject +----------PhysicalExcept +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk] +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 +--------------------------PhysicalProject +----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[customer] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query88.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query88.out new file mode 100644 index 00000000000000..5da04ad61d3f42 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query88.out @@ -0,0 +1,171 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_88 -- +PhysicalResultSink +--NestedLoopJoin[CROSS_JOIN] +----NestedLoopJoin[CROSS_JOIN] +------NestedLoopJoin[CROSS_JOIN] +--------NestedLoopJoin[CROSS_JOIN] +----------NestedLoopJoin[CROSS_JOIN] +------------NestedLoopJoin[CROSS_JOIN] +--------------NestedLoopJoin[CROSS_JOIN] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecGather] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF23 s_store_sk->[ss_store_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF22 hd_demo_sk->[ss_hdemo_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF21 t_time_sk->[ss_sold_time_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF21 RF22 RF23 +----------------------------------PhysicalProject +------------------------------------filter((time_dim.t_hour = 8) and (time_dim.t_minute >= 30)) +--------------------------------------PhysicalOlapScan[time_dim] +------------------------------PhysicalProject +--------------------------------filter((household_demographics.hd_vehicle_count <= 5) and OR[AND[(household_demographics.hd_dep_count = 0),(household_demographics.hd_vehicle_count <= 2)],AND[(household_demographics.hd_dep_count = -1),(household_demographics.hd_vehicle_count <= 1)],(household_demographics.hd_dep_count = 3)] and hd_dep_count IN (-1, 0, 3)) +----------------------------------PhysicalOlapScan[household_demographics] +--------------------------PhysicalProject +----------------------------filter((store.s_store_name = 'ese')) +------------------------------PhysicalOlapScan[store] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecGather] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF20 s_store_sk->[ss_store_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF19 hd_demo_sk->[ss_hdemo_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF18 t_time_sk->[ss_sold_time_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF18 RF19 RF20 +----------------------------------PhysicalProject +------------------------------------filter((time_dim.t_hour = 9) and (time_dim.t_minute < 30)) +--------------------------------------PhysicalOlapScan[time_dim] +------------------------------PhysicalProject +--------------------------------filter((household_demographics.hd_vehicle_count <= 5) and OR[AND[(household_demographics.hd_dep_count = 0),(household_demographics.hd_vehicle_count <= 2)],AND[(household_demographics.hd_dep_count = -1),(household_demographics.hd_vehicle_count <= 1)],(household_demographics.hd_dep_count = 3)] and hd_dep_count IN (-1, 0, 3)) +----------------------------------PhysicalOlapScan[household_demographics] +--------------------------PhysicalProject +----------------------------filter((store.s_store_name = 'ese')) +------------------------------PhysicalOlapScan[store] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecGather] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF17 s_store_sk->[ss_store_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF16 hd_demo_sk->[ss_hdemo_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF15 t_time_sk->[ss_sold_time_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF15 RF16 RF17 +--------------------------------PhysicalProject +----------------------------------filter((time_dim.t_hour = 9) and (time_dim.t_minute >= 30)) +------------------------------------PhysicalOlapScan[time_dim] +----------------------------PhysicalProject +------------------------------filter((household_demographics.hd_vehicle_count <= 5) and OR[AND[(household_demographics.hd_dep_count = 0),(household_demographics.hd_vehicle_count <= 2)],AND[(household_demographics.hd_dep_count = -1),(household_demographics.hd_vehicle_count <= 1)],(household_demographics.hd_dep_count = 3)] and hd_dep_count IN (-1, 0, 3)) +--------------------------------PhysicalOlapScan[household_demographics] +------------------------PhysicalProject +--------------------------filter((store.s_store_name = 'ese')) +----------------------------PhysicalOlapScan[store] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecGather] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF14 s_store_sk->[ss_store_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF13 hd_demo_sk->[ss_hdemo_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF12 t_time_sk->[ss_sold_time_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF12 RF13 RF14 +------------------------------PhysicalProject +--------------------------------filter((time_dim.t_hour = 10) and (time_dim.t_minute < 30)) +----------------------------------PhysicalOlapScan[time_dim] +--------------------------PhysicalProject +----------------------------filter((household_demographics.hd_vehicle_count <= 5) and OR[AND[(household_demographics.hd_dep_count = 0),(household_demographics.hd_vehicle_count <= 2)],AND[(household_demographics.hd_dep_count = -1),(household_demographics.hd_vehicle_count <= 1)],(household_demographics.hd_dep_count = 3)] and hd_dep_count IN (-1, 0, 3)) +------------------------------PhysicalOlapScan[household_demographics] +----------------------PhysicalProject +------------------------filter((store.s_store_name = 'ese')) +--------------------------PhysicalOlapScan[store] +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecGather] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF11 s_store_sk->[ss_store_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF10 hd_demo_sk->[ss_hdemo_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF9 t_time_sk->[ss_sold_time_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store_sales] apply RFs: RF9 RF10 RF11 +----------------------------PhysicalProject +------------------------------filter((time_dim.t_hour = 10) and (time_dim.t_minute >= 30)) +--------------------------------PhysicalOlapScan[time_dim] +------------------------PhysicalProject +--------------------------filter((household_demographics.hd_vehicle_count <= 5) and OR[AND[(household_demographics.hd_dep_count = 0),(household_demographics.hd_vehicle_count <= 2)],AND[(household_demographics.hd_dep_count = -1),(household_demographics.hd_vehicle_count <= 1)],(household_demographics.hd_dep_count = 3)] and hd_dep_count IN (-1, 0, 3)) +----------------------------PhysicalOlapScan[household_demographics] +--------------------PhysicalProject +----------------------filter((store.s_store_name = 'ese')) +------------------------PhysicalOlapScan[store] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecGather] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF8 s_store_sk->[ss_store_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF7 hd_demo_sk->[ss_hdemo_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF6 t_time_sk->[ss_sold_time_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7 RF8 +--------------------------PhysicalProject +----------------------------filter((time_dim.t_hour = 11) and (time_dim.t_minute < 30)) +------------------------------PhysicalOlapScan[time_dim] +----------------------PhysicalProject +------------------------filter((household_demographics.hd_vehicle_count <= 5) and OR[AND[(household_demographics.hd_dep_count = 0),(household_demographics.hd_vehicle_count <= 2)],AND[(household_demographics.hd_dep_count = -1),(household_demographics.hd_vehicle_count <= 1)],(household_demographics.hd_dep_count = 3)] and hd_dep_count IN (-1, 0, 3)) +--------------------------PhysicalOlapScan[household_demographics] +------------------PhysicalProject +--------------------filter((store.s_store_name = 'ese')) +----------------------PhysicalOlapScan[store] +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecGather] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF4 hd_demo_sk->[ss_hdemo_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF3 t_time_sk->[ss_sold_time_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store_sales] apply RFs: RF3 RF4 RF5 +------------------------PhysicalProject +--------------------------filter((time_dim.t_hour = 11) and (time_dim.t_minute >= 30)) +----------------------------PhysicalOlapScan[time_dim] +--------------------PhysicalProject +----------------------filter((household_demographics.hd_vehicle_count <= 5) and OR[AND[(household_demographics.hd_dep_count = 0),(household_demographics.hd_vehicle_count <= 2)],AND[(household_demographics.hd_dep_count = -1),(household_demographics.hd_vehicle_count <= 1)],(household_demographics.hd_dep_count = 3)] and hd_dep_count IN (-1, 0, 3)) +------------------------PhysicalOlapScan[household_demographics] +----------------PhysicalProject +------------------filter((store.s_store_name = 'ese')) +--------------------PhysicalOlapScan[store] +----hashAgg[GLOBAL] +------PhysicalDistribute[DistributionSpecGather] +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF1 hd_demo_sk->[ss_hdemo_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF0 t_time_sk->[ss_sold_time_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------PhysicalProject +------------------------filter((time_dim.t_hour = 12) and (time_dim.t_minute < 30)) +--------------------------PhysicalOlapScan[time_dim] +------------------PhysicalProject +--------------------filter((household_demographics.hd_vehicle_count <= 5) and OR[AND[(household_demographics.hd_dep_count = 0),(household_demographics.hd_vehicle_count <= 2)],AND[(household_demographics.hd_dep_count = -1),(household_demographics.hd_vehicle_count <= 1)],(household_demographics.hd_dep_count = 3)] and hd_dep_count IN (-1, 0, 3)) +----------------------PhysicalOlapScan[household_demographics] +--------------PhysicalProject +----------------filter((store.s_store_name = 'ese')) +------------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query89.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query89.out new file mode 100644 index 00000000000000..feceda97a51452 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query89.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_89 -- +PhysicalResultSink +--PhysicalProject +----PhysicalTopN[MERGE_SORT] +------PhysicalDistribute[DistributionSpecGather] +--------PhysicalTopN[LOCAL_SORT] +----------PhysicalProject +------------filter(( not (avg_monthly_sales = 0.0000)) and ((cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / tmp1.avg_monthly_sales) > 0.100000)) +--------------PhysicalWindow +----------------PhysicalQuickSort[LOCAL_SORT] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('audio', 'history', 'school-uniforms')],AND[i_category IN ('Men', 'Shoes', 'Sports'),i_class IN ('pants', 'tennis', 'womens')]] and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Shoes', 'Sports') and i_class IN ('audio', 'history', 'pants', 'school-uniforms', 'tennis', 'womens')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_year = 2001)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query9.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query9.out new file mode 100644 index 00000000000000..06cd8f92785e08 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query9.out @@ -0,0 +1,115 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_9 -- +PhysicalResultSink +--PhysicalDistribute[DistributionSpecGather] +----PhysicalProject +------NestedLoopJoin[CROSS_JOIN] +--------NestedLoopJoin[CROSS_JOIN] +----------NestedLoopJoin[CROSS_JOIN] +------------NestedLoopJoin[CROSS_JOIN] +--------------NestedLoopJoin[CROSS_JOIN] +----------------NestedLoopJoin[CROSS_JOIN] +------------------NestedLoopJoin[CROSS_JOIN] +--------------------NestedLoopJoin[CROSS_JOIN] +----------------------NestedLoopJoin[CROSS_JOIN] +------------------------NestedLoopJoin[CROSS_JOIN] +--------------------------NestedLoopJoin[CROSS_JOIN] +----------------------------NestedLoopJoin[CROSS_JOIN] +------------------------------NestedLoopJoin[CROSS_JOIN] +--------------------------------NestedLoopJoin[CROSS_JOIN] +----------------------------------PhysicalProject +------------------------------------NestedLoopJoin[CROSS_JOIN] +--------------------------------------PhysicalProject +----------------------------------------filter((reason.r_reason_sk = 1)) +------------------------------------------PhysicalOlapScan[reason] +--------------------------------------hashAgg[GLOBAL] +----------------------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------------------hashAgg[LOCAL] +--------------------------------------------PhysicalProject +----------------------------------------------filter((store_sales.ss_quantity <= 20) and (store_sales.ss_quantity >= 1)) +------------------------------------------------PhysicalOlapScan[store_sales] +----------------------------------hashAgg[GLOBAL] +------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------hashAgg[LOCAL] +----------------------------------------PhysicalProject +------------------------------------------filter((store_sales.ss_quantity <= 20) and (store_sales.ss_quantity >= 1)) +--------------------------------------------PhysicalOlapScan[store_sales] +--------------------------------hashAgg[GLOBAL] +----------------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------------hashAgg[LOCAL] +--------------------------------------PhysicalProject +----------------------------------------filter((store_sales.ss_quantity <= 20) and (store_sales.ss_quantity >= 1)) +------------------------------------------PhysicalOlapScan[store_sales] +------------------------------hashAgg[GLOBAL] +--------------------------------PhysicalDistribute[DistributionSpecGather] +----------------------------------hashAgg[LOCAL] +------------------------------------PhysicalProject +--------------------------------------filter((store_sales.ss_quantity <= 40) and (store_sales.ss_quantity >= 21)) +----------------------------------------PhysicalOlapScan[store_sales] +----------------------------hashAgg[GLOBAL] +------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------hashAgg[LOCAL] +----------------------------------PhysicalProject +------------------------------------filter((store_sales.ss_quantity <= 40) and (store_sales.ss_quantity >= 21)) +--------------------------------------PhysicalOlapScan[store_sales] +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecGather] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------filter((store_sales.ss_quantity <= 40) and (store_sales.ss_quantity >= 21)) +------------------------------------PhysicalOlapScan[store_sales] +------------------------hashAgg[GLOBAL] +--------------------------PhysicalDistribute[DistributionSpecGather] +----------------------------hashAgg[LOCAL] +------------------------------PhysicalProject +--------------------------------filter((store_sales.ss_quantity <= 60) and (store_sales.ss_quantity >= 41)) +----------------------------------PhysicalOlapScan[store_sales] +----------------------hashAgg[GLOBAL] +------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------hashAgg[LOCAL] +----------------------------PhysicalProject +------------------------------filter((store_sales.ss_quantity <= 60) and (store_sales.ss_quantity >= 41)) +--------------------------------PhysicalOlapScan[store_sales] +--------------------hashAgg[GLOBAL] +----------------------PhysicalDistribute[DistributionSpecGather] +------------------------hashAgg[LOCAL] +--------------------------PhysicalProject +----------------------------filter((store_sales.ss_quantity <= 60) and (store_sales.ss_quantity >= 41)) +------------------------------PhysicalOlapScan[store_sales] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecGather] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------filter((store_sales.ss_quantity <= 80) and (store_sales.ss_quantity >= 61)) +----------------------------PhysicalOlapScan[store_sales] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecGather] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------filter((store_sales.ss_quantity <= 80) and (store_sales.ss_quantity >= 61)) +--------------------------PhysicalOlapScan[store_sales] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecGather] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------filter((store_sales.ss_quantity <= 80) and (store_sales.ss_quantity >= 61)) +------------------------PhysicalOlapScan[store_sales] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecGather] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------filter((store_sales.ss_quantity <= 100) and (store_sales.ss_quantity >= 81)) +----------------------PhysicalOlapScan[store_sales] +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecGather] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------filter((store_sales.ss_quantity <= 100) and (store_sales.ss_quantity >= 81)) +--------------------PhysicalOlapScan[store_sales] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecGather] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------filter((store_sales.ss_quantity <= 100) and (store_sales.ss_quantity >= 81)) +------------------PhysicalOlapScan[store_sales] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query90.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query90.out new file mode 100644 index 00000000000000..e5f91ba2a61448 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query90.out @@ -0,0 +1,47 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_90 -- +PhysicalResultSink +--PhysicalTopN[GATHER_SORT] +----PhysicalProject +------NestedLoopJoin[CROSS_JOIN] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecGather] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_web_page_sk = web_page.wp_web_page_sk)) otherCondition=() build RFs:RF5 wp_web_page_sk->[ws_web_page_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_ship_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF4 hd_demo_sk->[ws_ship_hdemo_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF3 t_time_sk->[ws_sold_time_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF3 RF4 RF5 +--------------------------PhysicalProject +----------------------------filter((time_dim.t_hour <= 13) and (time_dim.t_hour >= 12)) +------------------------------PhysicalOlapScan[time_dim] +----------------------PhysicalProject +------------------------filter((household_demographics.hd_dep_count = 6)) +--------------------------PhysicalOlapScan[household_demographics] +------------------PhysicalProject +--------------------filter((web_page.wp_char_count <= 5200) and (web_page.wp_char_count >= 5000)) +----------------------PhysicalOlapScan[web_page] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecGather] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_web_page_sk = web_page.wp_web_page_sk)) otherCondition=() build RFs:RF2 wp_web_page_sk->[ws_web_page_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_ship_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF1 hd_demo_sk->[ws_ship_hdemo_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF0 t_time_sk->[ws_sold_time_sk] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 +--------------------------PhysicalProject +----------------------------filter((time_dim.t_hour <= 15) and (time_dim.t_hour >= 14)) +------------------------------PhysicalOlapScan[time_dim] +----------------------PhysicalProject +------------------------filter((household_demographics.hd_dep_count = 6)) +--------------------------PhysicalOlapScan[household_demographics] +------------------PhysicalProject +--------------------filter((web_page.wp_char_count <= 5200) and (web_page.wp_char_count >= 5000)) +----------------------PhysicalOlapScan[web_page] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query91.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query91.out new file mode 100644 index 00000000000000..25542328573fbd --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query91.out @@ -0,0 +1,41 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_91 -- +PhysicalResultSink +--PhysicalQuickSort[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalQuickSort[LOCAL_SORT] +--------PhysicalProject +----------hashAgg[GLOBAL] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_returns.cr_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF5 cc_call_center_sk->[cr_call_center_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_returns.cr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[cr_returned_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_returns.cr_returning_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cr_returning_customer_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3 RF4 RF5 +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF2 c_current_addr_sk->[ca_address_sk] +--------------------------------PhysicalProject +----------------------------------filter((customer_address.ca_gmt_offset = -7.00)) +------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF2 +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((household_demographics.hd_demo_sk = customer.c_current_hdemo_sk)) otherCondition=() build RFs:RF1 hd_demo_sk->[c_current_hdemo_sk] +------------------------------------PhysicalProject +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk)) otherCondition=() build RFs:RF0 cd_demo_sk->[c_current_cdemo_sk] +----------------------------------------PhysicalProject +------------------------------------------PhysicalOlapScan[customer] apply RFs: RF0 RF1 +----------------------------------------PhysicalProject +------------------------------------------filter(OR[AND[(customer_demographics.cd_marital_status = 'M'),(customer_demographics.cd_education_status = 'Unknown')],AND[(customer_demographics.cd_marital_status = 'W'),(customer_demographics.cd_education_status = 'Advanced Degree')]] and cd_education_status IN ('Advanced Degree', 'Unknown') and cd_marital_status IN ('M', 'W')) +--------------------------------------------PhysicalOlapScan[customer_demographics] +------------------------------------PhysicalProject +--------------------------------------filter((hd_buy_potential like 'Unknown%')) +----------------------------------------PhysicalOlapScan[household_demographics] +------------------------PhysicalProject +--------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 2000)) +----------------------------PhysicalOlapScan[date_dim] +--------------------PhysicalProject +----------------------PhysicalOlapScan[call_center] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query92.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query92.out new file mode 100644 index 00000000000000..f44949b6b8cece --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query92.out @@ -0,0 +1,25 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_92 -- +PhysicalResultSink +--PhysicalTopN[GATHER_SORT] +----hashAgg[GLOBAL] +------PhysicalDistribute[DistributionSpecGather] +--------hashAgg[LOCAL] +----------PhysicalProject +------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(ws_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) +--------------PhysicalWindow +----------------PhysicalQuickSort[LOCAL_SORT] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ws_item_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 +----------------------------PhysicalProject +------------------------------filter((item.i_manufact_id = 714)) +--------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------filter((date_dim.d_date <= '2000-05-01') and (date_dim.d_date >= '2000-02-01')) +----------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query93.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query93.out new file mode 100644 index 00000000000000..5a99fd09e06b91 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query93.out @@ -0,0 +1,21 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_93 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_returns.sr_item_sk = store_sales.ss_item_sk) and (store_returns.sr_ticket_number = store_sales.ss_ticket_number)) otherCondition=() build RFs:RF1 sr_item_sk->[ss_item_sk];RF2 sr_ticket_number->[ss_ticket_number] +------------------PhysicalProject +--------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_returns.sr_reason_sk = reason.r_reason_sk)) otherCondition=() build RFs:RF0 r_reason_sk->[sr_reason_sk] +----------------------PhysicalProject +------------------------PhysicalOlapScan[store_returns] apply RFs: RF0 +----------------------PhysicalProject +------------------------filter((reason.r_reason_desc = 'reason 58')) +--------------------------PhysicalOlapScan[reason] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query94.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query94.out new file mode 100644 index 00000000000000..78bc76f63ea747 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query94.out @@ -0,0 +1,35 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_94 -- +PhysicalResultSink +--PhysicalLimit[GLOBAL] +----PhysicalLimit[LOCAL] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute[DistributionSpecGather] +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4 ws_order_number->[ws_order_number] +--------------------PhysicalProject +----------------------PhysicalOlapScan[web_sales(ws2)] apply RFs: RF4 +--------------------hashJoin[RIGHT_ANTI_JOIN shuffle] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() build RFs:RF3 ws_order_number->[wr_order_number] +----------------------PhysicalProject +------------------------PhysicalOlapScan[web_returns(wr1)] apply RFs: RF3 +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF2 web_site_sk->[ws_web_site_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_ship_date_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[web_sales(ws1)] apply RFs: RF0 RF1 RF2 +----------------------------------PhysicalProject +------------------------------------filter((customer_address.ca_state = 'OK')) +--------------------------------------PhysicalOlapScan[customer_address] +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_date <= '2002-06-30') and (date_dim.d_date >= '2002-05-01')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter((web_site.web_company_name = 'pri')) +------------------------------PhysicalOlapScan[web_site] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query95.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query95.out new file mode 100644 index 00000000000000..058b68aa677160 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query95.out @@ -0,0 +1,44 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_95 -- +PhysicalCteAnchor ( cteId=CTEId#0 ) +--PhysicalCteProducer ( cteId=CTEId#0 ) +----PhysicalProject +------hashJoin[INNER_JOIN shuffle] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF0 ws_order_number->[ws_order_number] +--------PhysicalProject +----------PhysicalOlapScan[web_sales(ws1)] apply RFs: RF0 RF7 +--------PhysicalProject +----------PhysicalOlapScan[web_sales(ws2)] apply RFs: RF7 +--PhysicalResultSink +----PhysicalLimit[GLOBAL] +------PhysicalLimit[LOCAL] +--------hashAgg[DISTINCT_GLOBAL] +----------PhysicalDistribute[DistributionSpecGather] +------------hashAgg[DISTINCT_LOCAL] +--------------hashAgg[GLOBAL] +----------------hashAgg[LOCAL] +------------------hashJoin[RIGHT_SEMI_JOIN colocated] hashCondition=((ws1.ws_order_number = web_returns.wr_order_number)) otherCondition=() build RFs:RF6 ws_order_number->[wr_order_number,ws_order_number] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF5 wr_order_number->[ws_order_number] +------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 RF6 +------------------------PhysicalProject +--------------------------PhysicalOlapScan[web_returns] apply RFs: RF6 +--------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF7 ws_order_number->[ws_order_number,ws_order_number] +----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_ship_date_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[web_sales(ws1)] apply RFs: RF1 RF2 RF3 +----------------------------------PhysicalProject +------------------------------------filter((customer_address.ca_state = 'VA')) +--------------------------------------PhysicalOlapScan[customer_address] +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_date <= '2001-05-31') and (date_dim.d_date >= '2001-04-01')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter((web_site.web_company_name = 'pri')) +------------------------------PhysicalOlapScan[web_site] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query96.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query96.out new file mode 100644 index 00000000000000..7e3d08cf2617a5 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query96.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_96 -- +PhysicalResultSink +--PhysicalLimit[GLOBAL] +----PhysicalLimit[LOCAL] +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecGather] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------PhysicalProject +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF1 hd_demo_sk->[ss_hdemo_sk] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_time_sk = time_dim.t_time_sk)) otherCondition=() build RFs:RF0 t_time_sk->[ss_sold_time_sk] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------PhysicalProject +--------------------------filter((time_dim.t_hour = 8) and (time_dim.t_minute >= 30)) +----------------------------PhysicalOlapScan[time_dim] +--------------------PhysicalProject +----------------------filter((household_demographics.hd_dep_count = 0)) +------------------------PhysicalOlapScan[household_demographics] +----------------PhysicalProject +------------------filter((store.s_store_name = 'ese')) +--------------------PhysicalOlapScan[store] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query97.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query97.out new file mode 100644 index 00000000000000..d3a845763241f7 --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query97.out @@ -0,0 +1,35 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_97 -- +PhysicalResultSink +--PhysicalLimit[GLOBAL] +----PhysicalLimit[LOCAL] +------hashAgg[GLOBAL] +--------PhysicalDistribute[DistributionSpecGather] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------hashJoin[FULL_OUTER_JOIN colocated] hashCondition=((ssci.customer_sk = csci.customer_sk) and (ssci.item_sk = csci.item_sk)) otherCondition=() +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------PhysicalProject +------------------------------filter(( not ss_sold_date_sk IS NULL)) +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 +----------------------------PhysicalProject +------------------------------filter((date_dim.d_month_seq <= 1210) and (date_dim.d_month_seq >= 1199)) +--------------------------------PhysicalOlapScan[date_dim] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] +----------------------------PhysicalProject +------------------------------filter(( not cs_sold_date_sk IS NULL)) +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 +----------------------------PhysicalProject +------------------------------filter((date_dim.d_month_seq <= 1210) and (date_dim.d_month_seq >= 1199)) +--------------------------------PhysicalOlapScan[date_dim] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query98.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query98.out new file mode 100644 index 00000000000000..beb47b1d23d9dd --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query98.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_98 -- +PhysicalResultSink +--PhysicalQuickSort[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalQuickSort[LOCAL_SORT] +--------PhysicalProject +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ss_item_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_date <= '1999-03-07') and (date_dim.d_date >= '1999-02-05')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter(i_category IN ('Jewelry', 'Men', 'Sports')) +------------------------------PhysicalOlapScan[item] + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query99.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query99.out new file mode 100644 index 00000000000000..de639b9015342e --- /dev/null +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query99.out @@ -0,0 +1,29 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !ds_shape_99 -- +PhysicalResultSink +--PhysicalTopN[MERGE_SORT] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalTopN[LOCAL_SORT] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_ship_mode_sk = ship_mode.sm_ship_mode_sk)) otherCondition=() build RFs:RF2 sm_ship_mode_sk->[cs_ship_mode_sk] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_warehouse_sk = warehouse.w_warehouse_sk)) otherCondition=() build RFs:RF1 w_warehouse_sk->[cs_warehouse_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_ship_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 RF3 +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_month_seq <= 1205) and (date_dim.d_month_seq >= 1194)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[warehouse] +----------------------PhysicalProject +------------------------PhysicalOlapScan[ship_mode] +------------------PhysicalProject +--------------------PhysicalOlapScan[call_center] + diff --git a/regression-test/suites/shape_check/tpcds_sf1000/shape/query64.groovy b/regression-test/suites/shape_check/tpcds_sf1000/shape/query64.groovy index a253ba0c4d1671..ef89e0bd0a7bc5 100644 --- a/regression-test/suites/shape_check/tpcds_sf1000/shape/query64.groovy +++ b/regression-test/suites/shape_check/tpcds_sf1000/shape/query64.groovy @@ -155,125 +155,125 @@ order by cs1.product_name ,cs2.cnt ,cs1.s1 ,cs2.s1""" - qt_ds_shape_64 ''' - explain shape plan - with cs_ui as - (select cs_item_sk - ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund - from catalog_sales - ,catalog_returns - where cs_item_sk = cr_item_sk - and cs_order_number = cr_order_number - group by cs_item_sk - having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), -cross_sales as - (select i_product_name product_name - ,i_item_sk item_sk - ,s_store_name store_name - ,s_zip store_zip - ,ad1.ca_street_number b_street_number - ,ad1.ca_street_name b_street_name - ,ad1.ca_city b_city - ,ad1.ca_zip b_zip - ,ad2.ca_street_number c_street_number - ,ad2.ca_street_name c_street_name - ,ad2.ca_city c_city - ,ad2.ca_zip c_zip - ,d1.d_year as syear - ,d2.d_year as fsyear - ,d3.d_year s2year - ,count(*) cnt - ,sum(ss_wholesale_cost) s1 - ,sum(ss_list_price) s2 - ,sum(ss_coupon_amt) s3 - FROM store_sales - ,store_returns - ,cs_ui - ,date_dim d1 - ,date_dim d2 - ,date_dim d3 - ,store - ,customer - ,customer_demographics cd1 - ,customer_demographics cd2 - ,promotion - ,household_demographics hd1 - ,household_demographics hd2 - ,customer_address ad1 - ,customer_address ad2 - ,income_band ib1 - ,income_band ib2 - ,item - WHERE ss_store_sk = s_store_sk AND - ss_sold_date_sk = d1.d_date_sk AND - ss_customer_sk = c_customer_sk AND - ss_cdemo_sk= cd1.cd_demo_sk AND - ss_hdemo_sk = hd1.hd_demo_sk AND - ss_addr_sk = ad1.ca_address_sk and - ss_item_sk = i_item_sk and - ss_item_sk = sr_item_sk and - ss_ticket_number = sr_ticket_number and - ss_item_sk = cs_ui.cs_item_sk and - c_current_cdemo_sk = cd2.cd_demo_sk AND - c_current_hdemo_sk = hd2.hd_demo_sk AND - c_current_addr_sk = ad2.ca_address_sk and - c_first_sales_date_sk = d2.d_date_sk and - c_first_shipto_date_sk = d3.d_date_sk and - ss_promo_sk = p_promo_sk and - hd1.hd_income_band_sk = ib1.ib_income_band_sk and - hd2.hd_income_band_sk = ib2.ib_income_band_sk and - cd1.cd_marital_status <> cd2.cd_marital_status and - i_color in ('orange','lace','lawn','misty','blush','pink') and - i_current_price between 48 and 48 + 10 and - i_current_price between 48 + 1 and 48 + 15 -group by i_product_name - ,i_item_sk - ,s_store_name - ,s_zip - ,ad1.ca_street_number - ,ad1.ca_street_name - ,ad1.ca_city - ,ad1.ca_zip - ,ad2.ca_street_number - ,ad2.ca_street_name - ,ad2.ca_city - ,ad2.ca_zip - ,d1.d_year - ,d2.d_year - ,d3.d_year -) -select cs1.product_name - ,cs1.store_name - ,cs1.store_zip - ,cs1.b_street_number - ,cs1.b_street_name - ,cs1.b_city - ,cs1.b_zip - ,cs1.c_street_number - ,cs1.c_street_name - ,cs1.c_city - ,cs1.c_zip - ,cs1.syear - ,cs1.cnt - ,cs1.s1 as s11 - ,cs1.s2 as s21 - ,cs1.s3 as s31 - ,cs2.s1 as s12 - ,cs2.s2 as s22 - ,cs2.s3 as s32 - ,cs2.syear - ,cs2.cnt -from cross_sales cs1,cross_sales cs2 -where cs1.item_sk=cs2.item_sk and - cs1.syear = 1999 and - cs2.syear = 1999 + 1 and - cs2.cnt <= cs1.cnt and - cs1.store_name = cs2.store_name and - cs1.store_zip = cs2.store_zip -order by cs1.product_name - ,cs1.store_name - ,cs2.cnt - ,cs1.s1 - ,cs2.s1 - ''' +// qt_ds_shape_64 ''' +// explain shape plan +// with cs_ui as +// (select cs_item_sk +// ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund +// from catalog_sales +// ,catalog_returns +// where cs_item_sk = cr_item_sk +// and cs_order_number = cr_order_number +// group by cs_item_sk +// having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +// cross_sales as +// (select i_product_name product_name +// ,i_item_sk item_sk +// ,s_store_name store_name +// ,s_zip store_zip +// ,ad1.ca_street_number b_street_number +// ,ad1.ca_street_name b_street_name +// ,ad1.ca_city b_city +// ,ad1.ca_zip b_zip +// ,ad2.ca_street_number c_street_number +// ,ad2.ca_street_name c_street_name +// ,ad2.ca_city c_city +// ,ad2.ca_zip c_zip +// ,d1.d_year as syear +// ,d2.d_year as fsyear +// ,d3.d_year s2year +// ,count(*) cnt +// ,sum(ss_wholesale_cost) s1 +// ,sum(ss_list_price) s2 +// ,sum(ss_coupon_amt) s3 +// FROM store_sales +// ,store_returns +// ,cs_ui +// ,date_dim d1 +// ,date_dim d2 +// ,date_dim d3 +// ,store +// ,customer +// ,customer_demographics cd1 +// ,customer_demographics cd2 +// ,promotion +// ,household_demographics hd1 +// ,household_demographics hd2 +// ,customer_address ad1 +// ,customer_address ad2 +// ,income_band ib1 +// ,income_band ib2 +// ,item +// WHERE ss_store_sk = s_store_sk AND +// ss_sold_date_sk = d1.d_date_sk AND +// ss_customer_sk = c_customer_sk AND +// ss_cdemo_sk= cd1.cd_demo_sk AND +// ss_hdemo_sk = hd1.hd_demo_sk AND +// ss_addr_sk = ad1.ca_address_sk and +// ss_item_sk = i_item_sk and +// ss_item_sk = sr_item_sk and +// ss_ticket_number = sr_ticket_number and +// ss_item_sk = cs_ui.cs_item_sk and +// c_current_cdemo_sk = cd2.cd_demo_sk AND +// c_current_hdemo_sk = hd2.hd_demo_sk AND +// c_current_addr_sk = ad2.ca_address_sk and +// c_first_sales_date_sk = d2.d_date_sk and +// c_first_shipto_date_sk = d3.d_date_sk and +// ss_promo_sk = p_promo_sk and +// hd1.hd_income_band_sk = ib1.ib_income_band_sk and +// hd2.hd_income_band_sk = ib2.ib_income_band_sk and +// cd1.cd_marital_status <> cd2.cd_marital_status and +// i_color in ('orange','lace','lawn','misty','blush','pink') and +// i_current_price between 48 and 48 + 10 and +// i_current_price between 48 + 1 and 48 + 15 +// group by i_product_name +// ,i_item_sk +// ,s_store_name +// ,s_zip +// ,ad1.ca_street_number +// ,ad1.ca_street_name +// ,ad1.ca_city +// ,ad1.ca_zip +// ,ad2.ca_street_number +// ,ad2.ca_street_name +// ,ad2.ca_city +// ,ad2.ca_zip +// ,d1.d_year +// ,d2.d_year +// ,d3.d_year +// ) +// select cs1.product_name +// ,cs1.store_name +// ,cs1.store_zip +// ,cs1.b_street_number +// ,cs1.b_street_name +// ,cs1.b_city +// ,cs1.b_zip +// ,cs1.c_street_number +// ,cs1.c_street_name +// ,cs1.c_city +// ,cs1.c_zip +// ,cs1.syear +// ,cs1.cnt +// ,cs1.s1 as s11 +// ,cs1.s2 as s21 +// ,cs1.s3 as s31 +// ,cs2.s1 as s12 +// ,cs2.s2 as s22 +// ,cs2.s3 as s32 +// ,cs2.syear +// ,cs2.cnt +// from cross_sales cs1,cross_sales cs2 +// where cs1.item_sk=cs2.item_sk and +// cs1.syear = 1999 and +// cs2.syear = 1999 + 1 and +// cs2.cnt <= cs1.cnt and +// cs1.store_name = cs2.store_name and +// cs1.store_zip = cs2.store_zip +// order by cs1.product_name +// ,cs1.store_name +// ,cs2.cnt +// ,cs1.s1 +// ,cs2.s1 +// ''' } diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/load.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/load.groovy new file mode 100644 index 00000000000000..94ff5a97d8b216 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/load.groovy @@ -0,0 +1,2520 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("load") { + String database = context.config.getDbNameByFile(context.file) + sql "drop database if exists ${database}" + sql "create database ${database}" + sql "use ${database}" + + sql ''' + drop table if exists customer_demographics + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS customer_demographics ( + cd_demo_sk int not null, + cd_gender varchar(1), + cd_marital_status varchar(1), + cd_education_status varchar(20), + cd_purchase_estimate integer, + cd_credit_rating varchar(10), + cd_dep_count integer, + cd_dep_employed_count integer, + cd_dep_college_count integer + ) + DUPLICATE KEY(cd_demo_sk) + DISTRIBUTED BY HASH(cd_demo_sk) BUCKETS 9 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists reason + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS reason ( + r_reason_sk int not null, + r_reason_id varchar(16) not null, + r_reason_desc varchar(100) + ) + DUPLICATE KEY(r_reason_sk) + DISTRIBUTED BY HASH(r_reason_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists date_dim + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS date_dim ( + d_date_sk int not null, + d_date_id varchar(16) not null, + d_date datev2, + d_month_seq integer, + d_week_seq integer, + d_quarter_seq integer, + d_year integer, + d_dow integer, + d_moy integer, + d_dom integer, + d_qoy integer, + d_fy_year integer, + d_fy_quarter_seq integer, + d_fy_week_seq integer, + d_day_name varchar(9), + d_quarter_name varchar(6), + d_holiday varchar(1), + d_weekend varchar(1), + d_following_holiday varchar(1), + d_first_dom integer, + d_last_dom integer, + d_same_day_ly integer, + d_same_day_lq integer, + d_current_day varchar(1), + d_current_week varchar(1), + d_current_month varchar(1), + d_current_quarter varchar(1), + d_current_year varchar(1) + ) + DUPLICATE KEY(d_date_sk) + DISTRIBUTED BY HASH(d_date_sk) BUCKETS 9 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists warehouse + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS warehouse ( + w_warehouse_sk int not null, + w_warehouse_id varchar(16) not null, + w_warehouse_name varchar(20), + w_warehouse_sq_ft integer, + w_street_number varchar(10), + w_street_name varchar(60), + w_street_type varchar(15), + w_suite_number varchar(10), + w_city varchar(60), + w_county varchar(30), + w_state varchar(2), + w_zip varchar(10), + w_country varchar(20), + w_gmt_offset decimalv3(5,2) + ) + DUPLICATE KEY(w_warehouse_sk) + DISTRIBUTED BY HASH(w_warehouse_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists catalog_sales + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS catalog_sales ( + cs_sold_date_sk int, + cs_item_sk int not null, + cs_order_number int not null, + cs_sold_time_sk int, + cs_ship_date_sk int, + cs_bill_customer_sk int, + cs_bill_cdemo_sk int, + cs_bill_hdemo_sk int, + cs_bill_addr_sk int, + cs_ship_customer_sk int, + cs_ship_cdemo_sk int, + cs_ship_hdemo_sk int, + cs_ship_addr_sk int, + cs_call_center_sk int, + cs_catalog_page_sk int, + cs_ship_mode_sk int, + cs_warehouse_sk int, + cs_promo_sk int, + cs_quantity int, + cs_wholesale_cost decimalv3(7,2), + cs_list_price decimalv3(7,2), + cs_sales_price decimalv3(7,2), + cs_ext_discount_amt decimalv3(7,2), + cs_ext_sales_price decimalv3(7,2), + cs_ext_wholesale_cost decimalv3(7,2), + cs_ext_list_price decimalv3(7,2), + cs_ext_tax decimalv3(7,2), + cs_coupon_amt decimalv3(7,2), + cs_ext_ship_cost decimalv3(7,2), + cs_net_paid decimalv3(7,2), + cs_net_paid_inc_tax decimalv3(7,2), + cs_net_paid_inc_ship decimalv3(7,2), + cs_net_paid_inc_ship_tax decimalv3(7,2), + cs_net_profit decimalv3(7,2) + ) + DUPLICATE KEY(`cs_sold_date_sk`, `cs_item_sk`, `cs_order_number`) + DISTRIBUTED BY HASH(cs_item_sk, cs_order_number) BUCKETS 261 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists call_center + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS call_center ( + cc_call_center_sk int not null, + cc_call_center_id varchar(16) not null, + cc_rec_start_date date, + cc_rec_end_date date, + cc_closed_date_sk integer, + cc_open_date_sk integer, + cc_name varchar(50), + cc_class varchar(50), + cc_employees integer, + cc_sq_ft integer, + cc_hours varchar(20), + cc_manager varchar(40), + cc_mkt_id integer, + cc_mkt_class varchar(50), + cc_mkt_desc varchar(100), + cc_market_manager varchar(40), + cc_division integer, + cc_division_name varchar(50), + cc_company integer, + cc_company_name varchar(50), + cc_street_number varchar(10), + cc_street_name varchar(60), + cc_street_type varchar(15), + cc_suite_number varchar(10), + cc_city varchar(60), + cc_county varchar(30), + cc_state varchar(2), + cc_zip varchar(10), + cc_country varchar(20), + cc_gmt_offset decimalv3(5,2), + cc_tax_percentage decimalv3(5,2) + ) + DUPLICATE KEY(cc_call_center_sk) + DISTRIBUTED BY HASH(cc_call_center_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists inventory + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS inventory ( + inv_date_sk int not null, + inv_item_sk int not null, + inv_warehouse_sk int, + inv_quantity_on_hand integer + ) + DUPLICATE KEY(inv_date_sk, inv_item_sk, inv_warehouse_sk) + DISTRIBUTED BY HASH(inv_item_sk) BUCKETS 63 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists catalog_returns + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS catalog_returns ( + cr_returned_date_sk int, + cr_item_sk int not null, + cr_order_number int not null, + cr_returned_time_sk int, + cr_refunded_customer_sk int, + cr_refunded_cdemo_sk int, + cr_refunded_hdemo_sk int, + cr_refunded_addr_sk int, + cr_returning_customer_sk int, + cr_returning_cdemo_sk int, + cr_returning_hdemo_sk int, + cr_returning_addr_sk int, + cr_call_center_sk int, + cr_catalog_page_sk int, + cr_ship_mode_sk int, + cr_warehouse_sk int, + cr_reason_sk int, + cr_return_quantity integer, + cr_return_amount decimalv3(7,2), + cr_return_tax decimalv3(7,2), + cr_return_amt_inc_tax decimalv3(7,2), + cr_fee decimalv3(7,2), + cr_return_ship_cost decimalv3(7,2), + cr_refunded_cash decimalv3(7,2), + cr_reversed_charge decimalv3(7,2), + cr_store_credit decimalv3(7,2), + cr_net_loss decimalv3(7,2) + ) + DUPLICATE KEY(`cr_returned_date_sk`, `cr_item_sk`, `cr_order_number`) + DISTRIBUTED BY HASH(cr_item_sk, cr_order_number) BUCKETS 36 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists household_demographics + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS household_demographics ( + hd_demo_sk int not null, + hd_income_band_sk int, + hd_buy_potential varchar(15), + hd_dep_count integer, + hd_vehicle_count integer + ) + DUPLICATE KEY(hd_demo_sk) + DISTRIBUTED BY HASH(hd_demo_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists customer_address + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS customer_address ( + ca_address_sk int not null, + ca_address_id varchar(16) not null, + ca_street_number varchar(10), + ca_street_name varchar(60), + ca_street_type varchar(15), + ca_suite_number varchar(10), + ca_city varchar(60), + ca_county varchar(30), + ca_state varchar(2), + ca_zip varchar(10), + ca_country varchar(20), + ca_gmt_offset decimalv3(5,2), + ca_location_type varchar(20) + ) + DUPLICATE KEY(ca_address_sk) + DISTRIBUTED BY HASH(ca_address_sk) BUCKETS 18 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists income_band + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS income_band ( + ib_income_band_sk int not null, + ib_lower_bound integer, + ib_upper_bound integer + ) + DUPLICATE KEY(ib_income_band_sk) + DISTRIBUTED BY HASH(ib_income_band_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists catalog_page + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS catalog_page ( + cp_catalog_page_sk int not null, + cp_catalog_page_id varchar(16) not null, + cp_start_date_sk integer, + cp_end_date_sk integer, + cp_department varchar(50), + cp_catalog_number integer, + cp_catalog_page_number integer, + cp_description varchar(100), + cp_type varchar(100) + ) + DUPLICATE KEY(cp_catalog_page_sk) + DISTRIBUTED BY HASH(cp_catalog_page_sk) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists item + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS item ( + i_item_sk int not null, + i_item_id varchar(16) not null, + i_rec_start_date datev2, + i_rec_end_date datev2, + i_item_desc varchar(200), + i_current_price decimalv3(7,2), + i_wholesale_cost decimalv3(7,2), + i_brand_id integer, + i_brand varchar(50), + i_class_id integer, + i_class char(50), + i_category_id integer, + i_category varchar(50), + i_manufact_id integer, + i_manufact varchar(50), + i_size varchar(20), + i_formulation varchar(20), + i_color varchar(20), + i_units varchar(10), + i_container varchar(10), + i_manager_id integer, + i_product_name varchar(50) + ) + DUPLICATE KEY(i_item_sk) + DISTRIBUTED BY HASH(i_item_sk) BUCKETS 9 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists web_returns + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS web_returns ( + wr_returned_date_sk int, + wr_item_sk int not null, + wr_order_number int not null, + wr_returned_time_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_return_quantity integer, + wr_return_amt decimalv3(7,2), + wr_return_tax decimalv3(7,2), + wr_return_amt_inc_tax decimalv3(7,2), + wr_fee decimalv3(7,2), + wr_return_ship_cost decimalv3(7,2), + wr_refunded_cash decimalv3(7,2), + wr_reversed_charge decimalv3(7,2), + wr_account_credit decimalv3(7,2), + wr_net_loss decimalv3(7,2) + ) + DUPLICATE KEY(`wr_returned_date_sk`, `wr_item_sk`, `wr_order_number`) + DISTRIBUTED BY HASH(`wr_item_sk`, `wr_order_number`) BUCKETS 18 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists web_site + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS web_site ( + web_site_sk int not null, + web_site_id varchar(16) not null, + web_rec_start_date datev2, + web_rec_end_date datev2, + web_name varchar(50), + web_open_date_sk int, + web_close_date_sk int, + web_class varchar(50), + web_manager varchar(40), + web_mkt_id integer, + web_mkt_class varchar(50), + web_mkt_desc varchar(100), + web_market_manager varchar(40), + web_company_id integer, + web_company_name varchar(50), + web_street_number varchar(10), + web_street_name varchar(60), + web_street_type varchar(15), + web_suite_number varchar(10), + web_city varchar(60), + web_county varchar(30), + web_state varchar(2), + web_zip varchar(10), + web_country varchar(20), + web_gmt_offset decimalv3(5,2), + web_tax_percentage decimalv3(5,2) + ) + DUPLICATE KEY(web_site_sk) + DISTRIBUTED BY HASH(web_site_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists promotion + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS promotion ( + p_promo_sk int not null, + p_promo_id varchar(16) not null, + p_start_date_sk int, + p_end_date_sk int, + p_item_sk int, + p_cost decimalv3(15,2), + p_response_targe integer, + p_promo_name varchar(50), + p_channel_dmail varchar(1), + p_channel_email varchar(1), + p_channel_catalog varchar(1), + p_channel_tv varchar(1), + p_channel_radio varchar(1), + p_channel_press varchar(1), + p_channel_event varchar(1), + p_channel_demo varchar(1), + p_channel_details varchar(100), + p_purpose varchar(15), + p_discount_active varchar(1) + ) + DUPLICATE KEY(p_promo_sk) + DISTRIBUTED BY HASH(p_promo_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists web_sales + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS web_sales ( + ws_sold_date_sk int, + ws_item_sk int not null, + ws_order_number int not null, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_quantity integer, + ws_wholesale_cost decimalv3(7,2), + ws_list_price decimalv3(7,2), + ws_sales_price decimalv3(7,2), + ws_ext_discount_amt decimalv3(7,2), + ws_ext_sales_price decimalv3(7,2), + ws_ext_wholesale_cost decimalv3(7,2), + ws_ext_list_price decimalv3(7,2), + ws_ext_tax decimalv3(7,2), + ws_coupon_amt decimalv3(7,2), + ws_ext_ship_cost decimalv3(7,2), + ws_net_paid decimalv3(7,2), + ws_net_paid_inc_tax decimalv3(7,2), + ws_net_paid_inc_ship decimalv3(7,2), + ws_net_paid_inc_ship_tax decimalv3(7,2), + ws_net_profit decimalv3(7,2) + ) + DUPLICATE KEY(`ws_sold_date_sk`, `ws_item_sk`, `ws_order_number`) + DISTRIBUTED BY HASH(ws_item_sk, ws_order_number) BUCKETS 126 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists store + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS store ( + s_store_sk int not null, + s_store_id varchar(16) not null, + s_rec_start_date datev2, + s_rec_end_date datev2, + s_closed_date_sk int, + s_store_name varchar(50), + s_number_employees integer, + s_floor_space integer, + s_hours varchar(20), + s_manager varchar(40), + s_market_id integer, + s_geography_class varchar(100), + s_market_desc varchar(100), + s_market_manager varchar(40), + s_division_id integer, + s_division_name varchar(50), + s_company_id integer, + s_company_name varchar(50), + s_street_number varchar(10), + s_street_name varchar(60), + s_street_type varchar(15), + s_suite_number varchar(10), + s_city varchar(60), + s_county varchar(30), + s_state varchar(2), + s_zip varchar(10), + s_country varchar(20), + s_gmt_offset decimalv3(5,2), + s_tax_percentage decimalv3(5,2) + ) + DUPLICATE KEY(s_store_sk) + DISTRIBUTED BY HASH(s_store_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists time_dim + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS time_dim ( + t_time_sk int not null, + t_time_id varchar(16) not null, + t_time integer, + t_hour integer, + t_minute integer, + t_second integer, + t_am_pm varchar(2), + t_shift varchar(20), + t_sub_shift varchar(20), + t_meal_time varchar(20) + ) + DUPLICATE KEY(t_time_sk) + DISTRIBUTED BY HASH(t_time_sk) BUCKETS 9 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists web_page + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS web_page ( + wp_web_page_sk int not null, + wp_web_page_id varchar(16) not null, + wp_rec_start_date datev2, + wp_rec_end_date datev2, + wp_creation_date_sk int, + wp_access_date_sk int, + wp_autogen_flag varchar(1), + wp_customer_sk int, + wp_url varchar(100), + wp_type varchar(50), + wp_char_count integer, + wp_link_count integer, + wp_image_count integer, + wp_max_ad_count integer + ) + DUPLICATE KEY(wp_web_page_sk) + DISTRIBUTED BY HASH(wp_web_page_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists store_returns + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS store_returns ( + sr_returned_date_sk int, + sr_item_sk int not null, + sr_ticket_number int not null, + sr_return_time_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_return_quantity integer, + sr_return_amt decimalv3(7,2), + sr_return_tax decimalv3(7,2), + sr_return_amt_inc_tax decimalv3(7,2), + sr_fee decimalv3(7,2), + sr_return_ship_cost decimalv3(7,2), + sr_refunded_cash decimalv3(7,2), + sr_reversed_charge decimalv3(7,2), + sr_store_credit decimalv3(7,2), + sr_net_loss decimalv3(7,2) + ) + duplicate key(`sr_returned_date_sk`, `sr_item_sk`, `sr_ticket_number`) + distributed by hash (sr_item_sk, sr_ticket_number) buckets 36 + properties ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists store_sales + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS store_sales ( + ss_sold_date_sk int, + ss_item_sk int not null, + ss_ticket_number int not null, + ss_sold_time_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_quantity integer, + ss_wholesale_cost decimalv3(7,2), + ss_list_price decimalv3(7,2), + ss_sales_price decimalv3(7,2), + ss_ext_discount_amt decimalv3(7,2), + ss_ext_sales_price decimalv3(7,2), + ss_ext_wholesale_cost decimalv3(7,2), + ss_ext_list_price decimalv3(7,2), + ss_ext_tax decimalv3(7,2), + ss_coupon_amt decimalv3(7,2), + ss_net_paid decimalv3(7,2), + ss_net_paid_inc_tax decimalv3(7,2), + ss_net_profit decimalv3(7,2) + ) + DUPLICATE KEY(`ss_sold_date_sk`, `ss_item_sk`, `ss_ticket_number`) + DISTRIBUTED BY HASH(ss_item_sk, ss_ticket_number) BUCKETS 261 + PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "store" + ) + ''' + + sql ''' + drop table if exists ship_mode + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS ship_mode ( + sm_ship_mode_sk int not null, + sm_ship_mode_id varchar(16) not null, + sm_type varchar(30), + sm_code varchar(10), + sm_carrier varchar(20), + sm_contract varchar(20) + ) + DUPLICATE KEY(sm_ship_mode_sk) + DISTRIBUTED BY HASH(sm_ship_mode_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists customer + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS customer ( + c_customer_sk int not null, + c_customer_id varchar(16) not null, + c_current_cdemo_sk int, + c_current_hdemo_sk int, + c_current_addr_sk int, + c_first_shipto_date_sk int, + c_first_sales_date_sk int, + c_salutation varchar(10), + c_first_name varchar(20), + c_last_name varchar(30), + c_preferred_cust_flag varchar(1), + c_birth_day integer, + c_birth_month integer, + c_birth_year integer, + c_birth_country varchar(20), + c_login varchar(13), + c_email_address varchar(50), + c_last_review_date_sk int + ) + DUPLICATE KEY(c_customer_sk) + DISTRIBUTED BY HASH(c_customer_sk) BUCKETS 18 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists dbgen_version + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS dbgen_version + ( + dv_version varchar(16) , + dv_create_date datev2 , + dv_create_time datetime , + dv_cmdline_args varchar(200) + ) + DUPLICATE KEY(dv_version) + DISTRIBUTED BY HASH(dv_version) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql """ + alter table customer_demographics modify column cd_dep_employed_count set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='7683200') + """ + + sql """ + alter table date_dim modify column d_day_name set stats ('row_count'='73049', 'ndv'='7', 'num_nulls'='0', 'min_value'='Friday', 'max_value'='Wednesday', 'data_size'='521779') + """ + + sql """ + alter table date_dim modify column d_following_holiday set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_same_day_ly set stats ('row_count'='73049', 'ndv'='72450', 'num_nulls'='0', 'min_value'='2414657', 'max_value'='2487705', 'data_size'='292196') + """ + + sql """ + alter table warehouse modify column w_city set stats ('row_count'='20', 'ndv'='12', 'num_nulls'='0', 'min_value'='Fairview', 'max_value'='Shiloh', 'data_size'='183') + """ + + sql """ + alter table warehouse modify column w_street_type set stats ('row_count'='20', 'ndv'='14', 'num_nulls'='0', 'min_value'='', 'max_value'='Wy', 'data_size'='71') + """ + + sql """ + alter table catalog_sales modify column cs_call_center_sk set stats ('row_count'='1439980416', 'ndv'='42', 'num_nulls'='7199711', 'min_value'='1', 'max_value'='42', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_net_paid_inc_ship set stats ('row_count'='1439980416', 'ndv'='2505826', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='43956.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_sales_price set stats ('row_count'='1439980416', 'ndv'='29306', 'num_nulls'='7200276', 'min_value'='0.00', 'max_value'='300.00', 'data_size'='5759921664') + """ + + sql """ + alter table call_center modify column cc_class set stats ('row_count'='42', 'ndv'='3', 'num_nulls'='0', 'min_value'='large', 'max_value'='small', 'data_size'='226') + """ + + sql """ + alter table call_center modify column cc_country set stats ('row_count'='42', 'ndv'='1', 'num_nulls'='0', 'min_value'='United States', 'max_value'='United States', 'data_size'='546') + """ + + sql """ + alter table call_center modify column cc_county set stats ('row_count'='42', 'ndv'='16', 'num_nulls'='0', 'min_value'='Barrow County', 'max_value'='Williamson County', 'data_size'='627') + """ + + sql """ + alter table call_center modify column cc_mkt_class set stats ('row_count'='42', 'ndv'='36', 'num_nulls'='0', 'min_value'='A bit narrow forms matter animals. Consist', 'max_value'='Yesterday new men can make moreov', 'data_size'='1465') + """ + + sql """ + alter table call_center modify column cc_sq_ft set stats ('row_count'='42', 'ndv'='31', 'num_nulls'='0', 'min_value'='-1890660328', 'max_value'='2122480316', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_state set stats ('row_count'='42', 'ndv'='14', 'num_nulls'='0', 'min_value'='FL', 'max_value'='WV', 'data_size'='84') + """ + + sql """ + alter table inventory modify column inv_warehouse_sk set stats ('row_count'='783000000', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='6264000000') + """ + + sql """ + alter table catalog_returns modify column cr_refunded_addr_sk set stats ('row_count'='143996756', 'ndv'='6015811', 'num_nulls'='2881609', 'min_value'='1', 'max_value'='6000000', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_refunded_cash set stats ('row_count'='143996756', 'ndv'='1107525', 'num_nulls'='2879192', 'min_value'='0.00', 'max_value'='26955.24', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_refunded_cdemo_sk set stats ('row_count'='143996756', 'ndv'='1916366', 'num_nulls'='2881314', 'min_value'='1', 'max_value'='1920800', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_return_amt_inc_tax set stats ('row_count'='143996756', 'ndv'='1544502', 'num_nulls'='2881886', 'min_value'='0.00', 'max_value'='30418.06', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_returning_addr_sk set stats ('row_count'='143996756', 'ndv'='6015811', 'num_nulls'='2883215', 'min_value'='1', 'max_value'='6000000', 'data_size'='1151974048') + """ + + sql """ + alter table household_demographics modify column hd_buy_potential set stats ('row_count'='7200', 'ndv'='6', 'num_nulls'='0', 'min_value'='0-500', 'max_value'='Unknown', 'data_size'='54000') + """ + + sql """ + alter table customer_address modify column ca_address_id set stats ('row_count'='6000000', 'ndv'='5984931', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAAABAA', 'max_value'='AAAAAAAAPPPPPEAA', 'data_size'='96000000') + """ + + sql """ + alter table customer_address modify column ca_address_sk set stats ('row_count'='6000000', 'ndv'='6015811', 'num_nulls'='0', 'min_value'='1', 'max_value'='6000000', 'data_size'='48000000') + """ + + sql """ + alter table customer_address modify column ca_country set stats ('row_count'='6000000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='United States', 'data_size'='75661794') + """ + + sql """ + alter table customer_address modify column ca_location_type set stats ('row_count'='6000000', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='single family', 'data_size'='52372545') + """ + + sql """ + alter table customer_address modify column ca_street_number set stats ('row_count'='6000000', 'ndv'='1002', 'num_nulls'='0', 'min_value'='', 'max_value'='999', 'data_size'='16837336') + """ + + sql """ + alter table customer_address modify column ca_suite_number set stats ('row_count'='6000000', 'ndv'='76', 'num_nulls'='0', 'min_value'='', 'max_value'='Suite Y', 'data_size'='45911575') + """ + + sql """ + alter table catalog_page modify column cp_catalog_page_id set stats ('row_count'='30000', 'ndv'='29953', 'num_nulls'='0', 'min_value'='AAAAAAAAAAABAAAA', 'max_value'='AAAAAAAAPPPGAAAA', 'data_size'='480000') + """ + + sql """ + alter table item modify column i_rec_end_date set stats ('row_count'='300000', 'ndv'='3', 'num_nulls'='150000', 'min_value'='1999-10-27', 'max_value'='2001-10-26', 'data_size'='1200000') + """ + + sql """ + alter table web_returns modify column wr_refunded_addr_sk set stats ('row_count'='71997522', 'ndv'='6015811', 'num_nulls'='3239971', 'min_value'='1', 'max_value'='6000000', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_reversed_charge set stats ('row_count'='71997522', 'ndv'='692680', 'num_nulls'='3239546', 'min_value'='0.00', 'max_value'='23194.77', 'data_size'='287990088') + """ + + sql """ + alter table web_site modify column web_state set stats ('row_count'='54', 'ndv'='18', 'num_nulls'='0', 'min_value'='AL', 'max_value'='WV', 'data_size'='108') + """ + + sql """ + alter table promotion modify column p_end_date_sk set stats ('row_count'='1500', 'ndv'='683', 'num_nulls'='18', 'min_value'='2450113', 'max_value'='2450967', 'data_size'='12000') + """ + + sql """ + alter table web_sales modify column ws_bill_hdemo_sk set stats ('row_count'='720000376', 'ndv'='7251', 'num_nulls'='180139', 'min_value'='1', 'max_value'='7200', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_ext_ship_cost set stats ('row_count'='720000376', 'ndv'='567477', 'num_nulls'='180084', 'min_value'='0.00', 'max_value'='14950.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ship_addr_sk set stats ('row_count'='720000376', 'ndv'='6015811', 'num_nulls'='179848', 'min_value'='1', 'max_value'='6000000', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_ship_mode_sk set stats ('row_count'='720000376', 'ndv'='20', 'num_nulls'='180017', 'min_value'='1', 'max_value'='20', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_warehouse_sk set stats ('row_count'='720000376', 'ndv'='20', 'num_nulls'='180105', 'min_value'='1', 'max_value'='20', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_company_name set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='6965') + """ + + sql """ + alter table store modify column s_gmt_offset set stats ('row_count'='1002', 'ndv'='4', 'num_nulls'='6', 'min_value'='-8.00', 'max_value'='-5.00', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_manager set stats ('row_count'='1002', 'ndv'='739', 'num_nulls'='0', 'min_value'='', 'max_value'='Zane Clifton', 'data_size'='12649') + """ + + sql """ + alter table store modify column s_street_number set stats ('row_count'='1002', 'ndv'='521', 'num_nulls'='0', 'min_value'='', 'max_value'='999', 'data_size'='2874') + """ + + sql """ + alter table time_dim modify column t_meal_time set stats ('row_count'='86400', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='lunch', 'data_size'='248400') + """ + + sql """ + alter table time_dim modify column t_time set stats ('row_count'='86400', 'ndv'='86684', 'num_nulls'='0', 'min_value'='0', 'max_value'='86399', 'data_size'='345600') + """ + + sql """ + alter table web_page modify column wp_creation_date_sk set stats ('row_count'='3000', 'ndv'='199', 'num_nulls'='33', 'min_value'='2450604', 'max_value'='2450815', 'data_size'='24000') + """ + + sql """ + alter table web_page modify column wp_customer_sk set stats ('row_count'='3000', 'ndv'='713', 'num_nulls'='2147', 'min_value'='9522', 'max_value'='11995685', 'data_size'='24000') + """ + + sql """ + alter table web_page modify column wp_max_ad_count set stats ('row_count'='3000', 'ndv'='5', 'num_nulls'='31', 'min_value'='0', 'max_value'='4', 'data_size'='12000') + """ + + sql """ + alter table web_page modify column wp_url set stats ('row_count'='3000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='http://www.foo.com', 'data_size'='53406') + """ + + sql """ + alter table store_returns modify column sr_refunded_cash set stats ('row_count'='287999764', 'ndv'='928470', 'num_nulls'='10081294', 'min_value'='0.00', 'max_value'='18173.96', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_return_tax set stats ('row_count'='287999764', 'ndv'='117247', 'num_nulls'='10081332', 'min_value'='0.00', 'max_value'='1682.04', 'data_size'='1151999056') + """ + + sql """ + alter table store_sales modify column ss_customer_sk set stats ('row_count'='2879987999', 'ndv'='12157481', 'num_nulls'='129590766', 'min_value'='1', 'max_value'='12000000', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_hdemo_sk set stats ('row_count'='2879987999', 'ndv'='7251', 'num_nulls'='129594559', 'min_value'='1', 'max_value'='7200', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_store_sk set stats ('row_count'='2879987999', 'ndv'='499', 'num_nulls'='129572050', 'min_value'='1', 'max_value'='1000', 'data_size'='23039903992') + """ + + sql """ + alter table ship_mode modify column sm_ship_mode_id set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPAAAAAAA', 'data_size'='320') + """ + + sql """ + alter table ship_mode modify column sm_ship_mode_sk set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='160') + """ + + sql """ + alter table customer modify column c_first_name set stats ('row_count'='12000000', 'ndv'='5140', 'num_nulls'='0', 'min_value'='', 'max_value'='Zulma', 'data_size'='67593278') + """ + + sql """ + alter table customer modify column c_first_sales_date_sk set stats ('row_count'='12000000', 'ndv'='3644', 'num_nulls'='419856', 'min_value'='2448998', 'max_value'='2452648', 'data_size'='96000000') + """ + + sql """ + alter table customer modify column c_first_shipto_date_sk set stats ('row_count'='12000000', 'ndv'='3644', 'num_nulls'='420769', 'min_value'='2449028', 'max_value'='2452678', 'data_size'='96000000') + """ + + sql """ + alter table customer_demographics modify column cd_dep_college_count set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='7683200') + """ + + sql """ + alter table date_dim modify column d_dow set stats ('row_count'='73049', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_fy_quarter_seq set stats ('row_count'='73049', 'ndv'='801', 'num_nulls'='0', 'min_value'='1', 'max_value'='801', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_qoy set stats ('row_count'='73049', 'ndv'='4', 'num_nulls'='0', 'min_value'='1', 'max_value'='4', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_quarter_seq set stats ('row_count'='73049', 'ndv'='801', 'num_nulls'='0', 'min_value'='1', 'max_value'='801', 'data_size'='292196') + """ + + sql """ + alter table warehouse modify column w_street_name set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='', 'max_value'='Wilson Elm', 'data_size'='176') + """ + + sql """ + alter table warehouse modify column w_suite_number set stats ('row_count'='20', 'ndv'='18', 'num_nulls'='0', 'min_value'='', 'max_value'='Suite X', 'data_size'='150') + """ + + sql """ + alter table catalog_sales modify column cs_bill_cdemo_sk set stats ('row_count'='1439980416', 'ndv'='1916366', 'num_nulls'='7202134', 'min_value'='1', 'max_value'='1920800', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_bill_hdemo_sk set stats ('row_count'='1439980416', 'ndv'='7251', 'num_nulls'='7198837', 'min_value'='1', 'max_value'='7200', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_ext_ship_cost set stats ('row_count'='1439980416', 'ndv'='573238', 'num_nulls'='7202537', 'min_value'='0.00', 'max_value'='14994.00', 'data_size'='5759921664') + """ + + sql """ + alter table call_center modify column cc_name set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='California', 'max_value'='Pacific Northwest_2', 'data_size'='572') + """ + + sql """ + alter table call_center modify column cc_street_name set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='1st', 'max_value'='Willow', 'data_size'='356') + """ + + sql """ + alter table call_center modify column cc_zip set stats ('row_count'='42', 'ndv'='19', 'num_nulls'='0', 'min_value'='18605', 'max_value'='98048', 'data_size'='210') + """ + + sql """ + alter table inventory modify column inv_quantity_on_hand set stats ('row_count'='783000000', 'ndv'='1006', 'num_nulls'='39153758', 'min_value'='0', 'max_value'='1000', 'data_size'='3132000000') + """ + + sql """ + alter table catalog_returns modify column cr_catalog_page_sk set stats ('row_count'='143996756', 'ndv'='17005', 'num_nulls'='2882502', 'min_value'='1', 'max_value'='25207', 'data_size'='1151974048') + """ + + sql """ + alter table household_demographics modify column hd_income_band_sk set stats ('row_count'='7200', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='57600') + """ + + sql """ + alter table catalog_page modify column cp_description set stats ('row_count'='30000', 'ndv'='30141', 'num_nulls'='0', 'min_value'='', 'max_value'='Youngsters worry both workers. Fascinating characters take cheap never alive studies. Direct, old', 'data_size'='2215634') + """ + + sql """ + alter table item modify column i_item_id set stats ('row_count'='300000', 'ndv'='150851', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAABAAA', 'max_value'='AAAAAAAAPPPPBAAA', 'data_size'='4800000') + """ + + sql """ + alter table web_returns modify column wr_account_credit set stats ('row_count'='71997522', 'ndv'='683955', 'num_nulls'='3241972', 'min_value'='0.00', 'max_value'='23166.33', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_net_loss set stats ('row_count'='71997522', 'ndv'='815608', 'num_nulls'='3240573', 'min_value'='0.50', 'max_value'='15887.84', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_return_amt set stats ('row_count'='71997522', 'ndv'='808311', 'num_nulls'='3238405', 'min_value'='0.00', 'max_value'='29191.00', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_return_amt_inc_tax set stats ('row_count'='71997522', 'ndv'='1359913', 'num_nulls'='3239765', 'min_value'='0.00', 'max_value'='30393.01', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_return_quantity set stats ('row_count'='71997522', 'ndv'='100', 'num_nulls'='3238643', 'min_value'='1', 'max_value'='100', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_returning_addr_sk set stats ('row_count'='71997522', 'ndv'='6015811', 'num_nulls'='3239658', 'min_value'='1', 'max_value'='6000000', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_returning_customer_sk set stats ('row_count'='71997522', 'ndv'='12119220', 'num_nulls'='3237281', 'min_value'='1', 'max_value'='12000000', 'data_size'='575980176') + """ + + sql """ + alter table web_site modify column web_mkt_desc set stats ('row_count'='54', 'ndv'='38', 'num_nulls'='0', 'min_value'='Acres see else children. Mutual too', 'max_value'='Windows increase to a differences. Other parties might in', 'data_size'='3473') + """ + + sql """ + alter table web_site modify column web_mkt_id set stats ('row_count'='54', 'ndv'='6', 'num_nulls'='1', 'min_value'='1', 'max_value'='6', 'data_size'='216') + """ + + sql """ + alter table web_site modify column web_rec_end_date set stats ('row_count'='54', 'ndv'='3', 'num_nulls'='27', 'min_value'='1999-08-16', 'max_value'='2001-08-15', 'data_size'='216') + """ + + sql """ + alter table web_site modify column web_site_id set stats ('row_count'='54', 'ndv'='27', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPBAAAAAA', 'data_size'='864') + """ + + sql """ + alter table web_site modify column web_street_type set stats ('row_count'='54', 'ndv'='20', 'num_nulls'='0', 'min_value'='Ave', 'max_value'='Wy', 'data_size'='208') + """ + + sql """ + alter table promotion modify column p_channel_demo set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1479') + """ + + sql """ + alter table promotion modify column p_channel_details set stats ('row_count'='1500', 'ndv'='1490', 'num_nulls'='0', 'min_value'='', 'max_value'='Young, valuable companies watch walls. Payments can flour', 'data_size'='59126') + """ + + sql """ + alter table promotion modify column p_channel_event set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1482') + """ + + sql """ + alter table promotion modify column p_discount_active set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1473') + """ + + sql """ + alter table promotion modify column p_promo_sk set stats ('row_count'='1500', 'ndv'='1489', 'num_nulls'='0', 'min_value'='1', 'max_value'='1500', 'data_size'='12000') + """ + + sql """ + alter table promotion modify column p_purpose set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='10374') + """ + + sql """ + alter table web_sales modify column ws_bill_cdemo_sk set stats ('row_count'='720000376', 'ndv'='1916366', 'num_nulls'='179788', 'min_value'='1', 'max_value'='1920800', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_sold_date_sk set stats ('row_count'='720000376', 'ndv'='1820', 'num_nulls'='179921', 'min_value'='2450816', 'max_value'='2452642', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_web_site_sk set stats ('row_count'='720000376', 'ndv'='54', 'num_nulls'='179930', 'min_value'='1', 'max_value'='54', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_city set stats ('row_count'='1002', 'ndv'='55', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodlawn', 'data_size'='9238') + """ + + sql """ + alter table store modify column s_company_id set stats ('row_count'='1002', 'ndv'='1', 'num_nulls'='7', 'min_value'='1', 'max_value'='1', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_county set stats ('row_count'='1002', 'ndv'='28', 'num_nulls'='0', 'min_value'='', 'max_value'='Ziebach County', 'data_size'='14291') + """ + + sql """ + alter table store modify column s_geography_class set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='6972') + """ + + sql """ + alter table store modify column s_hours set stats ('row_count'='1002', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='8AM-8AM', 'data_size'='7088') + """ + + sql """ + alter table store modify column s_store_id set stats ('row_count'='1002', 'ndv'='501', 'num_nulls'='0', 'min_value'='AAAAAAAAAABAAAAA', 'max_value'='AAAAAAAAPPBAAAAA', 'data_size'='16032') + """ + + sql """ + alter table store modify column s_zip set stats ('row_count'='1002', 'ndv'='354', 'num_nulls'='0', 'min_value'='', 'max_value'='99454', 'data_size'='4975') + """ + + sql """ + alter table time_dim modify column t_am_pm set stats ('row_count'='86400', 'ndv'='2', 'num_nulls'='0', 'min_value'='AM', 'max_value'='PM', 'data_size'='172800') + """ + + sql """ + alter table time_dim modify column t_minute set stats ('row_count'='86400', 'ndv'='60', 'num_nulls'='0', 'min_value'='0', 'max_value'='59', 'data_size'='345600') + """ + + sql """ + alter table web_page modify column wp_web_page_id set stats ('row_count'='3000', 'ndv'='1501', 'num_nulls'='0', 'min_value'='AAAAAAAAAABAAAAA', 'max_value'='AAAAAAAAPPKAAAAA', 'data_size'='48000') + """ + + sql """ + alter table web_page modify column wp_web_page_sk set stats ('row_count'='3000', 'ndv'='2984', 'num_nulls'='0', 'min_value'='1', 'max_value'='3000', 'data_size'='24000') + """ + + sql """ + alter table store_returns modify column sr_return_amt set stats ('row_count'='287999764', 'ndv'='671228', 'num_nulls'='10080055', 'min_value'='0.00', 'max_value'='19434.00', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_returned_date_sk set stats ('row_count'='287999764', 'ndv'='2010', 'num_nulls'='10079607', 'min_value'='2450820', 'max_value'='2452822', 'data_size'='2303998112') + """ + + sql """ + alter table store_sales modify column ss_ext_tax set stats ('row_count'='2879987999', 'ndv'='149597', 'num_nulls'='129588732', 'min_value'='0.00', 'max_value'='1797.48', 'data_size'='11519951996') + """ + + sql """ + alter table customer modify column c_current_cdemo_sk set stats ('row_count'='12000000', 'ndv'='1913901', 'num_nulls'='419895', 'min_value'='1', 'max_value'='1920800', 'data_size'='96000000') + """ + + sql """ + alter table customer modify column c_customer_id set stats ('row_count'='12000000', 'ndv'='11921032', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAAABAA', 'max_value'='AAAAAAAAPPPPPKAA', 'data_size'='192000000') + """ + + sql """ + alter table date_dim modify column d_current_day set stats ('row_count'='73049', 'ndv'='1', 'num_nulls'='0', 'min_value'='N', 'max_value'='N', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_current_month set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_date set stats ('row_count'='73049', 'ndv'='73250', 'num_nulls'='0', 'min_value'='1900-01-02', 'max_value'='2100-01-01', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_moy set stats ('row_count'='73049', 'ndv'='12', 'num_nulls'='0', 'min_value'='1', 'max_value'='12', 'data_size'='292196') + """ + + sql """ + alter table warehouse modify column w_gmt_offset set stats ('row_count'='20', 'ndv'='3', 'num_nulls'='1', 'min_value'='-7.00', 'max_value'='-5.00', 'data_size'='80') + """ + + sql """ + alter table warehouse modify column w_warehouse_sk set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='160') + """ + + sql """ + alter table warehouse modify column w_warehouse_sq_ft set stats ('row_count'='20', 'ndv'='19', 'num_nulls'='1', 'min_value'='73065', 'max_value'='977787', 'data_size'='80') + """ + + sql """ + alter table catalog_sales modify column cs_ext_sales_price set stats ('row_count'='1439980416', 'ndv'='1100662', 'num_nulls'='7199625', 'min_value'='0.00', 'max_value'='29943.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_ext_wholesale_cost set stats ('row_count'='1439980416', 'ndv'='393180', 'num_nulls'='7199876', 'min_value'='1.00', 'max_value'='10000.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_item_sk set stats ('row_count'='1439980416', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_net_paid_inc_tax set stats ('row_count'='1439980416', 'ndv'='2422238', 'num_nulls'='7200702', 'min_value'='0.00', 'max_value'='32376.27', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_ship_date_sk set stats ('row_count'='1439980416', 'ndv'='1933', 'num_nulls'='7200707', 'min_value'='2450817', 'max_value'='2452744', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_warehouse_sk set stats ('row_count'='1439980416', 'ndv'='20', 'num_nulls'='7200688', 'min_value'='1', 'max_value'='20', 'data_size'='11519843328') + """ + + sql """ + alter table call_center modify column cc_division set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_division_name set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='able', 'max_value'='pri', 'data_size'='164') + """ + + sql """ + alter table call_center modify column cc_manager set stats ('row_count'='42', 'ndv'='28', 'num_nulls'='0', 'min_value'='Alden Snyder', 'max_value'='Wayne Ray', 'data_size'='519') + """ + + sql """ + alter table call_center modify column cc_rec_start_date set stats ('row_count'='42', 'ndv'='4', 'num_nulls'='0', 'min_value'='1998-01-01', 'max_value'='2002-01-01', 'data_size'='168') + """ + + sql """ + alter table catalog_returns modify column cr_call_center_sk set stats ('row_count'='143996756', 'ndv'='42', 'num_nulls'='2881668', 'min_value'='1', 'max_value'='42', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_net_loss set stats ('row_count'='143996756', 'ndv'='911034', 'num_nulls'='2881704', 'min_value'='0.50', 'max_value'='16095.08', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_refunded_customer_sk set stats ('row_count'='143996756', 'ndv'='12156363', 'num_nulls'='2879017', 'min_value'='1', 'max_value'='12000000', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_refunded_hdemo_sk set stats ('row_count'='143996756', 'ndv'='7251', 'num_nulls'='2882107', 'min_value'='1', 'max_value'='7200', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_returning_customer_sk set stats ('row_count'='143996756', 'ndv'='12157481', 'num_nulls'='2879023', 'min_value'='1', 'max_value'='12000000', 'data_size'='1151974048') + """ + + sql """ + alter table customer_address modify column ca_gmt_offset set stats ('row_count'='6000000', 'ndv'='6', 'num_nulls'='180219', 'min_value'='-10.00', 'max_value'='-5.00', 'data_size'='24000000') + """ + + sql """ + alter table item modify column i_color set stats ('row_count'='300000', 'ndv'='93', 'num_nulls'='0', 'min_value'='', 'max_value'='yellow', 'data_size'='1610293') + """ + + sql """ + alter table item modify column i_manufact set stats ('row_count'='300000', 'ndv'='1004', 'num_nulls'='0', 'min_value'='', 'max_value'='pripripri', 'data_size'='3379693') + """ + + sql """ + alter table item modify column i_product_name set stats ('row_count'='300000', 'ndv'='294994', 'num_nulls'='0', 'min_value'='', 'max_value'='pripripripripriought', 'data_size'='6849199') + """ + + sql """ + alter table web_returns modify column wr_returned_time_sk set stats ('row_count'='71997522', 'ndv'='87677', 'num_nulls'='3238574', 'min_value'='0', 'max_value'='86399', 'data_size'='575980176') + """ + + sql """ + alter table web_site modify column web_manager set stats ('row_count'='54', 'ndv'='40', 'num_nulls'='0', 'min_value'='', 'max_value'='William Young', 'data_size'='658') + """ + + sql """ + alter table web_site modify column web_mkt_class set stats ('row_count'='54', 'ndv'='40', 'num_nulls'='0', 'min_value'='', 'max_value'='Written, political plans show to the models. T', 'data_size'='1822') + """ + + sql """ + alter table web_site modify column web_rec_start_date set stats ('row_count'='54', 'ndv'='4', 'num_nulls'='2', 'min_value'='1997-08-16', 'max_value'='2001-08-16', 'data_size'='216') + """ + + sql """ + alter table web_site modify column web_street_number set stats ('row_count'='54', 'ndv'='36', 'num_nulls'='0', 'min_value'='', 'max_value'='983', 'data_size'='154') + """ + + sql """ + alter table promotion modify column p_channel_catalog set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1482') + """ + + sql """ + alter table promotion modify column p_promo_id set stats ('row_count'='1500', 'ndv'='1519', 'num_nulls'='0', 'min_value'='AAAAAAAAAABAAAAA', 'max_value'='AAAAAAAAPPEAAAAA', 'data_size'='24000') + """ + + sql """ + alter table web_sales modify column ws_bill_customer_sk set stats ('row_count'='720000376', 'ndv'='12103729', 'num_nulls'='179817', 'min_value'='1', 'max_value'='12000000', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_list_price set stats ('row_count'='720000376', 'ndv'='29396', 'num_nulls'='180053', 'min_value'='1.00', 'max_value'='300.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_sales_price set stats ('row_count'='720000376', 'ndv'='29288', 'num_nulls'='180005', 'min_value'='0.00', 'max_value'='300.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ship_hdemo_sk set stats ('row_count'='720000376', 'ndv'='7251', 'num_nulls'='179824', 'min_value'='1', 'max_value'='7200', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_closed_date_sk set stats ('row_count'='1002', 'ndv'='163', 'num_nulls'='729', 'min_value'='2450820', 'max_value'='2451313', 'data_size'='8016') + """ + + sql """ + alter table store modify column s_division_id set stats ('row_count'='1002', 'ndv'='1', 'num_nulls'='6', 'min_value'='1', 'max_value'='1', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_market_desc set stats ('row_count'='1002', 'ndv'='765', 'num_nulls'='0', 'min_value'='', 'max_value'='Yesterday left factors handle continuing co', 'data_size'='57638') + """ + + sql """ + alter table store modify column s_market_id set stats ('row_count'='1002', 'ndv'='10', 'num_nulls'='8', 'min_value'='1', 'max_value'='10', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_state set stats ('row_count'='1002', 'ndv'='22', 'num_nulls'='0', 'min_value'='', 'max_value'='WV', 'data_size'='1994') + """ + + sql """ + alter table store modify column s_store_sk set stats ('row_count'='1002', 'ndv'='988', 'num_nulls'='0', 'min_value'='1', 'max_value'='1002', 'data_size'='8016') + """ + + sql """ + alter table store modify column s_street_name set stats ('row_count'='1002', 'ndv'='549', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodland Oak', 'data_size'='8580') + """ + + sql """ + alter table web_page modify column wp_access_date_sk set stats ('row_count'='3000', 'ndv'='101', 'num_nulls'='31', 'min_value'='2452548', 'max_value'='2452648', 'data_size'='24000') + """ + + sql """ + alter table web_page modify column wp_char_count set stats ('row_count'='3000', 'ndv'='1883', 'num_nulls'='42', 'min_value'='303', 'max_value'='8523', 'data_size'='12000') + """ + + sql """ + alter table store_returns modify column sr_addr_sk set stats ('row_count'='287999764', 'ndv'='6015811', 'num_nulls'='10082311', 'min_value'='1', 'max_value'='6000000', 'data_size'='2303998112') + """ + + sql """ + alter table store_returns modify column sr_return_time_sk set stats ('row_count'='287999764', 'ndv'='32660', 'num_nulls'='10082805', 'min_value'='28799', 'max_value'='61199', 'data_size'='2303998112') + """ + + sql """ + alter table store_returns modify column sr_store_sk set stats ('row_count'='287999764', 'ndv'='499', 'num_nulls'='10081871', 'min_value'='1', 'max_value'='1000', 'data_size'='2303998112') + """ + + sql """ + alter table store_sales modify column ss_coupon_amt set stats ('row_count'='2879987999', 'ndv'='1161208', 'num_nulls'='129609101', 'min_value'='0.00', 'max_value'='19778.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_sales_price set stats ('row_count'='2879987999', 'ndv'='19780', 'num_nulls'='129598061', 'min_value'='0.00', 'max_value'='200.00', 'data_size'='11519951996') + """ + + sql """ + alter table customer modify column c_birth_country set stats ('row_count'='12000000', 'ndv'='211', 'num_nulls'='0', 'min_value'='', 'max_value'='ZIMBABWE', 'data_size'='100750845') + """ + + sql """ + alter table customer modify column c_birth_month set stats ('row_count'='12000000', 'ndv'='12', 'num_nulls'='419629', 'min_value'='1', 'max_value'='12', 'data_size'='48000000') + """ + + sql """ + alter table customer modify column c_customer_sk set stats ('row_count'='12000000', 'ndv'='12157481', 'num_nulls'='0', 'min_value'='1', 'max_value'='12000000', 'data_size'='96000000') + """ + + sql """ + alter table customer modify column c_email_address set stats ('row_count'='12000000', 'ndv'='11642077', 'num_nulls'='0', 'min_value'='', 'max_value'='Zulma.Young@aDhzZzCzYN.edu', 'data_size'='318077849') + """ + + sql """ + alter table customer modify column c_last_review_date_sk set stats ('row_count'='12000000', 'ndv'='366', 'num_nulls'='419900', 'min_value'='2452283', 'max_value'='2452648', 'data_size'='96000000') + """ + + sql """ + alter table customer modify column c_preferred_cust_flag set stats ('row_count'='12000000', 'ndv'='3', 'num_nulls'='0', 'min_value'='', 'max_value'='Y', 'data_size'='11580510') + """ + + sql """ + alter table dbgen_version modify column dv_version set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='3.2.0', 'max_value'='3.2.0', 'data_size'='5') + """ + + sql """ + alter table customer_demographics modify column cd_purchase_estimate set stats ('row_count'='1920800', 'ndv'='20', 'num_nulls'='0', 'min_value'='500', 'max_value'='10000', 'data_size'='7683200') + """ + + sql """ + alter table reason modify column r_reason_id set stats ('row_count'='65', 'ndv'='65', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPDAAAAAA', 'data_size'='1040') + """ + + sql """ + alter table reason modify column r_reason_sk set stats ('row_count'='65', 'ndv'='65', 'num_nulls'='0', 'min_value'='1', 'max_value'='65', 'data_size'='520') + """ + + sql """ + alter table date_dim modify column d_current_week set stats ('row_count'='73049', 'ndv'='1', 'num_nulls'='0', 'min_value'='N', 'max_value'='N', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_first_dom set stats ('row_count'='73049', 'ndv'='2410', 'num_nulls'='0', 'min_value'='2415021', 'max_value'='2488070', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_fy_year set stats ('row_count'='73049', 'ndv'='202', 'num_nulls'='0', 'min_value'='1900', 'max_value'='2100', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_last_dom set stats ('row_count'='73049', 'ndv'='2419', 'num_nulls'='0', 'min_value'='2415020', 'max_value'='2488372', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_month_seq set stats ('row_count'='73049', 'ndv'='2398', 'num_nulls'='0', 'min_value'='0', 'max_value'='2400', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_quarter_name set stats ('row_count'='73049', 'ndv'='799', 'num_nulls'='0', 'min_value'='1900Q1', 'max_value'='2100Q1', 'data_size'='438294') + """ + + sql """ + alter table warehouse modify column w_county set stats ('row_count'='20', 'ndv'='14', 'num_nulls'='0', 'min_value'='Bronx County', 'max_value'='Ziebach County', 'data_size'='291') + """ + + sql """ + alter table warehouse modify column w_street_number set stats ('row_count'='20', 'ndv'='19', 'num_nulls'='0', 'min_value'='', 'max_value'='957', 'data_size'='54') + """ + + sql """ + alter table warehouse modify column w_warehouse_name set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='', 'max_value'='Therefore urg', 'data_size'='307') + """ + + sql """ + alter table catalog_sales modify column cs_ext_discount_amt set stats ('row_count'='1439980416', 'ndv'='1100115', 'num_nulls'='7201054', 'min_value'='0.00', 'max_value'='29982.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_net_paid_inc_ship_tax set stats ('row_count'='1439980416', 'ndv'='3312360', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='46593.36', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_promo_sk set stats ('row_count'='1439980416', 'ndv'='1489', 'num_nulls'='7202844', 'min_value'='1', 'max_value'='1500', 'data_size'='11519843328') + """ + + sql """ + alter table call_center modify column cc_call_center_id set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPBAAAAAA', 'data_size'='672') + """ + + sql """ + alter table call_center modify column cc_employees set stats ('row_count'='42', 'ndv'='30', 'num_nulls'='0', 'min_value'='69020', 'max_value'='6879074', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_suite_number set stats ('row_count'='42', 'ndv'='18', 'num_nulls'='0', 'min_value'='Suite 0', 'max_value'='Suite W', 'data_size'='326') + """ + + sql """ + alter table catalog_returns modify column cr_item_sk set stats ('row_count'='143996756', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_reason_sk set stats ('row_count'='143996756', 'ndv'='65', 'num_nulls'='2881950', 'min_value'='1', 'max_value'='65', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_return_ship_cost set stats ('row_count'='143996756', 'ndv'='483467', 'num_nulls'='2883436', 'min_value'='0.00', 'max_value'='14273.28', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_ship_mode_sk set stats ('row_count'='143996756', 'ndv'='20', 'num_nulls'='2879879', 'min_value'='1', 'max_value'='20', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_store_credit set stats ('row_count'='143996756', 'ndv'='802237', 'num_nulls'='2880469', 'min_value'='0.00', 'max_value'='23215.15', 'data_size'='575987024') + """ + + sql """ + alter table customer_address modify column ca_city set stats ('row_count'='6000000', 'ndv'='977', 'num_nulls'='0', 'min_value'='', 'max_value'='Zion', 'data_size'='52096290') + """ + + sql """ + alter table customer_address modify column ca_state set stats ('row_count'='6000000', 'ndv'='52', 'num_nulls'='0', 'min_value'='', 'max_value'='WY', 'data_size'='11640128') + """ + + sql """ + alter table customer_address modify column ca_street_name set stats ('row_count'='6000000', 'ndv'='8173', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodland Woodland', 'data_size'='50697257') + """ + + sql """ + alter table customer_address modify column ca_street_type set stats ('row_count'='6000000', 'ndv'='21', 'num_nulls'='0', 'min_value'='', 'max_value'='Wy', 'data_size'='24441630') + """ + + sql """ + alter table catalog_page modify column cp_catalog_number set stats ('row_count'='30000', 'ndv'='109', 'num_nulls'='297', 'min_value'='1', 'max_value'='109', 'data_size'='120000') + """ + + sql """ + alter table catalog_page modify column cp_catalog_page_number set stats ('row_count'='30000', 'ndv'='279', 'num_nulls'='294', 'min_value'='1', 'max_value'='277', 'data_size'='120000') + """ + + sql """ + alter table catalog_page modify column cp_catalog_page_sk set stats ('row_count'='30000', 'ndv'='30439', 'num_nulls'='0', 'min_value'='1', 'max_value'='30000', 'data_size'='240000') + """ + + sql """ + alter table catalog_page modify column cp_start_date_sk set stats ('row_count'='30000', 'ndv'='91', 'num_nulls'='286', 'min_value'='2450815', 'max_value'='2453005', 'data_size'='120000') + """ + + sql """ + alter table item modify column i_rec_start_date set stats ('row_count'='300000', 'ndv'='4', 'num_nulls'='784', 'min_value'='1997-10-27', 'max_value'='2001-10-27', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_units set stats ('row_count'='300000', 'ndv'='22', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='1253652') + """ + + sql """ + alter table web_returns modify column wr_refunded_hdemo_sk set stats ('row_count'='71997522', 'ndv'='7251', 'num_nulls'='3238545', 'min_value'='1', 'max_value'='7200', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_return_ship_cost set stats ('row_count'='71997522', 'ndv'='451263', 'num_nulls'='3239048', 'min_value'='0.00', 'max_value'='14352.10', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_returned_date_sk set stats ('row_count'='71997522', 'ndv'='2188', 'num_nulls'='3239259', 'min_value'='2450819', 'max_value'='2453002', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_returning_cdemo_sk set stats ('row_count'='71997522', 'ndv'='1916366', 'num_nulls'='3239192', 'min_value'='1', 'max_value'='1920800', 'data_size'='575980176') + """ + + sql """ + alter table web_site modify column web_suite_number set stats ('row_count'='54', 'ndv'='38', 'num_nulls'='0', 'min_value'='Suite 100', 'max_value'='Suite Y', 'data_size'='430') + """ + + sql """ + alter table promotion modify column p_start_date_sk set stats ('row_count'='1500', 'ndv'='685', 'num_nulls'='23', 'min_value'='2450096', 'max_value'='2450915', 'data_size'='12000') + """ + + sql """ + alter table web_sales modify column ws_coupon_amt set stats ('row_count'='720000376', 'ndv'='1505315', 'num_nulls'='179933', 'min_value'='0.00', 'max_value'='28824.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ext_wholesale_cost set stats ('row_count'='720000376', 'ndv'='393180', 'num_nulls'='180060', 'min_value'='1.00', 'max_value'='10000.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_net_paid_inc_ship set stats ('row_count'='720000376', 'ndv'='2414838', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='44263.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ship_date_sk set stats ('row_count'='720000376', 'ndv'='1952', 'num_nulls'='180011', 'min_value'='2450817', 'max_value'='2452762', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_web_page_sk set stats ('row_count'='720000376', 'ndv'='2984', 'num_nulls'='179732', 'min_value'='1', 'max_value'='3000', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_country set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='United States', 'data_size'='12961') + """ + + sql """ + alter table store modify column s_store_name set stats ('row_count'='1002', 'ndv'='11', 'num_nulls'='0', 'min_value'='', 'max_value'='pri', 'data_size'='3916') + """ + + sql """ + alter table time_dim modify column t_second set stats ('row_count'='86400', 'ndv'='60', 'num_nulls'='0', 'min_value'='0', 'max_value'='59', 'data_size'='345600') + """ + + sql """ + alter table time_dim modify column t_sub_shift set stats ('row_count'='86400', 'ndv'='4', 'num_nulls'='0', 'min_value'='afternoon', 'max_value'='night', 'data_size'='597600') + """ + + sql """ + alter table web_page modify column wp_image_count set stats ('row_count'='3000', 'ndv'='7', 'num_nulls'='26', 'min_value'='1', 'max_value'='7', 'data_size'='12000') + """ + + sql """ + alter table web_page modify column wp_type set stats ('row_count'='3000', 'ndv'='8', 'num_nulls'='0', 'min_value'='', 'max_value'='welcome', 'data_size'='18867') + """ + + sql """ + alter table store_returns modify column sr_customer_sk set stats ('row_count'='287999764', 'ndv'='12157481', 'num_nulls'='10081624', 'min_value'='1', 'max_value'='12000000', 'data_size'='2303998112') + """ + + sql """ + alter table store_returns modify column sr_hdemo_sk set stats ('row_count'='287999764', 'ndv'='7251', 'num_nulls'='10083275', 'min_value'='1', 'max_value'='7200', 'data_size'='2303998112') + """ + + sql """ + alter table store_sales modify column ss_addr_sk set stats ('row_count'='2879987999', 'ndv'='6015811', 'num_nulls'='129589799', 'min_value'='1', 'max_value'='6000000', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_item_sk set stats ('row_count'='2879987999', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_quantity set stats ('row_count'='2879987999', 'ndv'='100', 'num_nulls'='129584258', 'min_value'='1', 'max_value'='100', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_ticket_number set stats ('row_count'='2879987999', 'ndv'='238830448', 'num_nulls'='0', 'min_value'='1', 'max_value'='240000000', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_wholesale_cost set stats ('row_count'='2879987999', 'ndv'='9905', 'num_nulls'='129590273', 'min_value'='1.00', 'max_value'='100.00', 'data_size'='11519951996') + """ + + sql """ + alter table ship_mode modify column sm_type set stats ('row_count'='20', 'ndv'='6', 'num_nulls'='0', 'min_value'='EXPRESS', 'max_value'='TWO DAY', 'data_size'='150') + """ + + sql """ + alter table customer modify column c_current_addr_sk set stats ('row_count'='12000000', 'ndv'='5243359', 'num_nulls'='0', 'min_value'='3', 'max_value'='6000000', 'data_size'='96000000') + """ + + sql """ + alter table customer modify column c_last_name set stats ('row_count'='12000000', 'ndv'='4990', 'num_nulls'='0', 'min_value'='', 'max_value'='Zuniga', 'data_size'='70991730') + """ + + sql """ + alter table dbgen_version modify column dv_cmdline_args set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='-SCALE 1000 -PARALLEL 64 -CHILD 1 -TERMINATE N -DIR /mnt/datadisk0/tpcds1t/tpcds-data', 'max_value'='-SCALE 1000 -PARALLEL 64 -CHILD 1 -TERMINATE N -DIR /mnt/datadisk0/tpcds1t/tpcds-data', 'data_size'='86') + """ + + sql """ + alter table date_dim modify column d_current_quarter set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_date_sk set stats ('row_count'='73049', 'ndv'='73042', 'num_nulls'='0', 'min_value'='2415022', 'max_value'='2488070', 'data_size'='584392') + """ + + sql """ + alter table date_dim modify column d_holiday set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table warehouse modify column w_country set stats ('row_count'='20', 'ndv'='1', 'num_nulls'='0', 'min_value'='United States', 'max_value'='United States', 'data_size'='260') + """ + + sql """ + alter table warehouse modify column w_state set stats ('row_count'='20', 'ndv'='13', 'num_nulls'='0', 'min_value'='AL', 'max_value'='TN', 'data_size'='40') + """ + + sql """ + alter table catalog_sales modify column cs_bill_addr_sk set stats ('row_count'='1439980416', 'ndv'='6015811', 'num_nulls'='7199539', 'min_value'='1', 'max_value'='6000000', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_bill_customer_sk set stats ('row_count'='1439980416', 'ndv'='12157481', 'num_nulls'='7201919', 'min_value'='1', 'max_value'='12000000', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_net_paid set stats ('row_count'='1439980416', 'ndv'='1809875', 'num_nulls'='7197668', 'min_value'='0.00', 'max_value'='29943.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_ship_addr_sk set stats ('row_count'='1439980416', 'ndv'='6015811', 'num_nulls'='7198232', 'min_value'='1', 'max_value'='6000000', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_ship_mode_sk set stats ('row_count'='1439980416', 'ndv'='20', 'num_nulls'='7201083', 'min_value'='1', 'max_value'='20', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_sold_date_sk set stats ('row_count'='1439980416', 'ndv'='1835', 'num_nulls'='7203326', 'min_value'='2450815', 'max_value'='2452654', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_sold_time_sk set stats ('row_count'='1439980416', 'ndv'='87677', 'num_nulls'='7201329', 'min_value'='0', 'max_value'='86399', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_wholesale_cost set stats ('row_count'='1439980416', 'ndv'='9905', 'num_nulls'='7201098', 'min_value'='1.00', 'max_value'='100.00', 'data_size'='5759921664') + """ + + sql """ + alter table call_center modify column cc_company_name set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='able', 'max_value'='pri', 'data_size'='160') + """ + + sql """ + alter table call_center modify column cc_market_manager set stats ('row_count'='42', 'ndv'='35', 'num_nulls'='0', 'min_value'='Cesar Allen', 'max_value'='William Larsen', 'data_size'='524') + """ + + sql """ + alter table call_center modify column cc_mkt_id set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_street_type set stats ('row_count'='42', 'ndv'='11', 'num_nulls'='0', 'min_value'='Avenue', 'max_value'='Way', 'data_size'='184') + """ + + sql """ + alter table catalog_returns modify column cr_return_tax set stats ('row_count'='143996756', 'ndv'='149828', 'num_nulls'='2881611', 'min_value'='0.00', 'max_value'='2511.58', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_returning_cdemo_sk set stats ('row_count'='143996756', 'ndv'='1916366', 'num_nulls'='2880543', 'min_value'='1', 'max_value'='1920800', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_returning_hdemo_sk set stats ('row_count'='143996756', 'ndv'='7251', 'num_nulls'='2882692', 'min_value'='1', 'max_value'='7200', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_reversed_charge set stats ('row_count'='143996756', 'ndv'='802509', 'num_nulls'='2881215', 'min_value'='0.00', 'max_value'='24033.84', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_warehouse_sk set stats ('row_count'='143996756', 'ndv'='20', 'num_nulls'='2882192', 'min_value'='1', 'max_value'='20', 'data_size'='1151974048') + """ + + sql """ + alter table household_demographics modify column hd_demo_sk set stats ('row_count'='7200', 'ndv'='7251', 'num_nulls'='0', 'min_value'='1', 'max_value'='7200', 'data_size'='57600') + """ + + sql """ + alter table household_demographics modify column hd_vehicle_count set stats ('row_count'='7200', 'ndv'='6', 'num_nulls'='0', 'min_value'='-1', 'max_value'='4', 'data_size'='28800') + """ + + sql """ + alter table customer_address modify column ca_zip set stats ('row_count'='6000000', 'ndv'='9253', 'num_nulls'='0', 'min_value'='', 'max_value'='99981', 'data_size'='29097610') + """ + + sql """ + alter table income_band modify column ib_income_band_sk set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='160') + """ + + sql """ + alter table catalog_page modify column cp_type set stats ('row_count'='30000', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='quarterly', 'data_size'='227890') + """ + + sql """ + alter table item modify column i_brand set stats ('row_count'='300000', 'ndv'='714', 'num_nulls'='0', 'min_value'='', 'max_value'='univunivamalg #9', 'data_size'='4834917') + """ + + sql """ + alter table item modify column i_formulation set stats ('row_count'='300000', 'ndv'='224757', 'num_nulls'='0', 'min_value'='', 'max_value'='yellow98911509228741', 'data_size'='5984460') + """ + + sql """ + alter table item modify column i_item_desc set stats ('row_count'='300000', 'ndv'='217721', 'num_nulls'='0', 'min_value'='', 'max_value'='Youngsters used to save quite colour', 'data_size'='30093342') + """ + + sql """ + alter table web_returns modify column wr_fee set stats ('row_count'='71997522', 'ndv'='9958', 'num_nulls'='3238926', 'min_value'='0.50', 'max_value'='100.00', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_item_sk set stats ('row_count'='71997522', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_reason_sk set stats ('row_count'='71997522', 'ndv'='65', 'num_nulls'='3238897', 'min_value'='1', 'max_value'='65', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_refunded_customer_sk set stats ('row_count'='71997522', 'ndv'='12117831', 'num_nulls'='3242433', 'min_value'='1', 'max_value'='12000000', 'data_size'='575980176') + """ + + sql """ + alter table web_site modify column web_city set stats ('row_count'='54', 'ndv'='31', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodlawn', 'data_size'='491') + """ + + sql """ + alter table web_site modify column web_close_date_sk set stats ('row_count'='54', 'ndv'='18', 'num_nulls'='10', 'min_value'='2441265', 'max_value'='2446218', 'data_size'='432') + """ + + sql """ + alter table web_site modify column web_company_id set stats ('row_count'='54', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='216') + """ + + sql """ + alter table web_site modify column web_company_name set stats ('row_count'='54', 'ndv'='7', 'num_nulls'='0', 'min_value'='', 'max_value'='pri', 'data_size'='203') + """ + + sql """ + alter table web_site modify column web_county set stats ('row_count'='54', 'ndv'='25', 'num_nulls'='0', 'min_value'='', 'max_value'='Williamson County', 'data_size'='762') + """ + + sql """ + alter table web_site modify column web_name set stats ('row_count'='54', 'ndv'='10', 'num_nulls'='0', 'min_value'='', 'max_value'='site_8', 'data_size'='312') + """ + + sql """ + alter table web_site modify column web_open_date_sk set stats ('row_count'='54', 'ndv'='27', 'num_nulls'='1', 'min_value'='2450373', 'max_value'='2450807', 'data_size'='432') + """ + + sql """ + alter table promotion modify column p_channel_dmail set stats ('row_count'='1500', 'ndv'='3', 'num_nulls'='0', 'min_value'='', 'max_value'='Y', 'data_size'='1483') + """ + + sql """ + alter table promotion modify column p_channel_press set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1481') + """ + + sql """ + alter table promotion modify column p_channel_radio set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1479') + """ + + sql """ + alter table promotion modify column p_cost set stats ('row_count'='1500', 'ndv'='1', 'num_nulls'='18', 'min_value'='1000.00', 'max_value'='1000.00', 'data_size'='12000') + """ + + sql """ + alter table web_sales modify column ws_ext_tax set stats ('row_count'='720000376', 'ndv'='211413', 'num_nulls'='179695', 'min_value'='0.00', 'max_value'='2682.90', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_item_sk set stats ('row_count'='720000376', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_net_paid set stats ('row_count'='720000376', 'ndv'='1749360', 'num_nulls'='179970', 'min_value'='0.00', 'max_value'='29810.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_net_paid_inc_ship_tax set stats ('row_count'='720000376', 'ndv'='3224829', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='46004.19', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_net_paid_inc_tax set stats ('row_count'='720000376', 'ndv'='2354996', 'num_nulls'='179972', 'min_value'='0.00', 'max_value'='32492.90', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_order_number set stats ('row_count'='720000376', 'ndv'='60401176', 'num_nulls'='0', 'min_value'='1', 'max_value'='60000000', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_quantity set stats ('row_count'='720000376', 'ndv'='100', 'num_nulls'='179781', 'min_value'='1', 'max_value'='100', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ship_cdemo_sk set stats ('row_count'='720000376', 'ndv'='1916366', 'num_nulls'='180290', 'min_value'='1', 'max_value'='1920800', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_sold_time_sk set stats ('row_count'='720000376', 'ndv'='87677', 'num_nulls'='179980', 'min_value'='0', 'max_value'='86399', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_street_type set stats ('row_count'='1002', 'ndv'='21', 'num_nulls'='0', 'min_value'='', 'max_value'='Wy', 'data_size'='4189') + """ + + sql """ + alter table web_page modify column wp_autogen_flag set stats ('row_count'='3000', 'ndv'='3', 'num_nulls'='0', 'min_value'='', 'max_value'='Y', 'data_size'='2962') + """ + + sql """ + alter table web_page modify column wp_rec_start_date set stats ('row_count'='3000', 'ndv'='4', 'num_nulls'='29', 'min_value'='1997-09-03', 'max_value'='2001-09-03', 'data_size'='12000') + """ + + sql """ + alter table store_returns modify column sr_net_loss set stats ('row_count'='287999764', 'ndv'='714210', 'num_nulls'='10080716', 'min_value'='0.50', 'max_value'='10776.08', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_return_amt_inc_tax set stats ('row_count'='287999764', 'ndv'='1259368', 'num_nulls'='10076879', 'min_value'='0.00', 'max_value'='20454.63', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_return_quantity set stats ('row_count'='287999764', 'ndv'='100', 'num_nulls'='10082815', 'min_value'='1', 'max_value'='100', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_return_ship_cost set stats ('row_count'='287999764', 'ndv'='355844', 'num_nulls'='10081927', 'min_value'='0.00', 'max_value'='9767.34', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_reversed_charge set stats ('row_count'='287999764', 'ndv'='700618', 'num_nulls'='10085976', 'min_value'='0.00', 'max_value'='17339.42', 'data_size'='1151999056') + """ + + sql """ + alter table store_sales modify column ss_net_paid_inc_tax set stats ('row_count'='2879987999', 'ndv'='1681767', 'num_nulls'='129609050', 'min_value'='0.00', 'max_value'='21769.48', 'data_size'='11519951996') + """ + + sql """ + alter table customer modify column c_birth_day set stats ('row_count'='12000000', 'ndv'='31', 'num_nulls'='420361', 'min_value'='1', 'max_value'='31', 'data_size'='48000000') + """ + + sql """ + alter table customer_demographics modify column cd_credit_rating set stats ('row_count'='1920800', 'ndv'='4', 'num_nulls'='0', 'min_value'='Good', 'max_value'='Unknown', 'data_size'='13445600') + """ + + sql """ + alter table customer_demographics modify column cd_demo_sk set stats ('row_count'='1920800', 'ndv'='1916366', 'num_nulls'='0', 'min_value'='1', 'max_value'='1920800', 'data_size'='15366400') + """ + + sql """ + alter table customer_demographics modify column cd_dep_count set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='7683200') + """ + + sql """ + alter table customer_demographics modify column cd_education_status set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='2 yr Degree', 'max_value'='Unknown', 'data_size'='18384800') + """ + + sql """ + alter table customer_demographics modify column cd_gender set stats ('row_count'='1920800', 'ndv'='2', 'num_nulls'='0', 'min_value'='F', 'max_value'='M', 'data_size'='1920800') + """ + + sql """ + alter table customer_demographics modify column cd_marital_status set stats ('row_count'='1920800', 'ndv'='5', 'num_nulls'='0', 'min_value'='D', 'max_value'='W', 'data_size'='1920800') + """ + + sql """ + alter table date_dim modify column d_date_id set stats ('row_count'='73049', 'ndv'='72907', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAAFCAA', 'max_value'='AAAAAAAAPPPPECAA', 'data_size'='1168784') + """ + + sql """ + alter table date_dim modify column d_fy_week_seq set stats ('row_count'='73049', 'ndv'='10448', 'num_nulls'='0', 'min_value'='1', 'max_value'='10436', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_year set stats ('row_count'='73049', 'ndv'='202', 'num_nulls'='0', 'min_value'='1900', 'max_value'='2100', 'data_size'='292196') + """ + + sql """ + alter table warehouse modify column w_warehouse_id set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPAAAAAAA', 'data_size'='320') + """ + + sql """ + alter table catalog_sales modify column cs_ext_list_price set stats ('row_count'='1439980416', 'ndv'='1160303', 'num_nulls'='7199542', 'min_value'='1.00', 'max_value'='30000.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_ext_tax set stats ('row_count'='1439980416', 'ndv'='215267', 'num_nulls'='7200412', 'min_value'='0.00', 'max_value'='2673.27', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_quantity set stats ('row_count'='1439980416', 'ndv'='100', 'num_nulls'='7202885', 'min_value'='1', 'max_value'='100', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_ship_cdemo_sk set stats ('row_count'='1439980416', 'ndv'='1916366', 'num_nulls'='7200151', 'min_value'='1', 'max_value'='1920800', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_ship_customer_sk set stats ('row_count'='1439980416', 'ndv'='12157481', 'num_nulls'='7201507', 'min_value'='1', 'max_value'='12000000', 'data_size'='11519843328') + """ + + sql """ + alter table call_center modify column cc_company set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_mkt_desc set stats ('row_count'='42', 'ndv'='33', 'num_nulls'='0', 'min_value'='Arms increase controversial, present so', 'max_value'='Young tests could buy comfortable, local users; o', 'data_size'='2419') + """ + + sql """ + alter table call_center modify column cc_open_date_sk set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='2450794', 'max_value'='2451146', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_rec_end_date set stats ('row_count'='42', 'ndv'='3', 'num_nulls'='21', 'min_value'='2000-01-01', 'max_value'='2001-12-31', 'data_size'='168') + """ + + sql """ + alter table catalog_returns modify column cr_order_number set stats ('row_count'='143996756', 'ndv'='93476424', 'num_nulls'='0', 'min_value'='2', 'max_value'='160000000', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_return_amount set stats ('row_count'='143996756', 'ndv'='882831', 'num_nulls'='2880424', 'min_value'='0.00', 'max_value'='28805.04', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_returned_date_sk set stats ('row_count'='143996756', 'ndv'='2108', 'num_nulls'='0', 'min_value'='2450821', 'max_value'='2452924', 'data_size'='1151974048') + """ + + sql """ + alter table income_band modify column ib_upper_bound set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='10000', 'max_value'='200000', 'data_size'='80') + """ + + sql """ + alter table catalog_page modify column cp_department set stats ('row_count'='30000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='DEPARTMENT', 'data_size'='297110') + """ + + sql """ + alter table catalog_page modify column cp_end_date_sk set stats ('row_count'='30000', 'ndv'='97', 'num_nulls'='302', 'min_value'='2450844', 'max_value'='2453186', 'data_size'='120000') + """ + + sql """ + alter table item modify column i_brand_id set stats ('row_count'='300000', 'ndv'='951', 'num_nulls'='763', 'min_value'='1001001', 'max_value'='10016017', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_category set stats ('row_count'='300000', 'ndv'='11', 'num_nulls'='0', 'min_value'='', 'max_value'='Women', 'data_size'='1766742') + """ + + sql """ + alter table item modify column i_class_id set stats ('row_count'='300000', 'ndv'='16', 'num_nulls'='722', 'min_value'='1', 'max_value'='16', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_item_sk set stats ('row_count'='300000', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='2400000') + """ + + sql """ + alter table item modify column i_manufact_id set stats ('row_count'='300000', 'ndv'='1005', 'num_nulls'='761', 'min_value'='1', 'max_value'='1000', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_wholesale_cost set stats ('row_count'='300000', 'ndv'='7243', 'num_nulls'='740', 'min_value'='0.02', 'max_value'='89.49', 'data_size'='1200000') + """ + + sql """ + alter table web_returns modify column wr_refunded_cdemo_sk set stats ('row_count'='71997522', 'ndv'='1916366', 'num_nulls'='3240352', 'min_value'='1', 'max_value'='1920800', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_return_tax set stats ('row_count'='71997522', 'ndv'='137392', 'num_nulls'='3237729', 'min_value'='0.00', 'max_value'='2551.16', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_returning_hdemo_sk set stats ('row_count'='71997522', 'ndv'='7251', 'num_nulls'='3238239', 'min_value'='1', 'max_value'='7200', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_web_page_sk set stats ('row_count'='71997522', 'ndv'='2984', 'num_nulls'='3240387', 'min_value'='1', 'max_value'='3000', 'data_size'='575980176') + """ + + sql """ + alter table web_site modify column web_class set stats ('row_count'='54', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='371') + """ + + sql """ + alter table web_site modify column web_zip set stats ('row_count'='54', 'ndv'='32', 'num_nulls'='0', 'min_value'='14593', 'max_value'='99431', 'data_size'='270') + """ + + sql """ + alter table promotion modify column p_channel_email set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1480') + """ + + sql """ + alter table promotion modify column p_item_sk set stats ('row_count'='1500', 'ndv'='1467', 'num_nulls'='19', 'min_value'='184', 'max_value'='299990', 'data_size'='12000') + """ + + sql """ + alter table promotion modify column p_promo_name set stats ('row_count'='1500', 'ndv'='11', 'num_nulls'='0', 'min_value'='', 'max_value'='pri', 'data_size'='5896') + """ + + sql """ + alter table web_sales modify column ws_ext_discount_amt set stats ('row_count'='720000376', 'ndv'='1093513', 'num_nulls'='179851', 'min_value'='0.00', 'max_value'='29982.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ext_list_price set stats ('row_count'='720000376', 'ndv'='1160303', 'num_nulls'='179866', 'min_value'='1.00', 'max_value'='30000.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_wholesale_cost set stats ('row_count'='720000376', 'ndv'='9905', 'num_nulls'='179834', 'min_value'='1.00', 'max_value'='100.00', 'data_size'='2880001504') + """ + + sql """ + alter table store modify column s_market_manager set stats ('row_count'='1002', 'ndv'='732', 'num_nulls'='0', 'min_value'='', 'max_value'='Zane Perez', 'data_size'='12823') + """ + + sql """ + alter table store modify column s_number_employees set stats ('row_count'='1002', 'ndv'='101', 'num_nulls'='8', 'min_value'='200', 'max_value'='300', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_rec_end_date set stats ('row_count'='1002', 'ndv'='3', 'num_nulls'='501', 'min_value'='1999-03-13', 'max_value'='2001-03-12', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_rec_start_date set stats ('row_count'='1002', 'ndv'='4', 'num_nulls'='7', 'min_value'='1997-03-13', 'max_value'='2001-03-13', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_suite_number set stats ('row_count'='1002', 'ndv'='76', 'num_nulls'='0', 'min_value'='', 'max_value'='Suite Y', 'data_size'='7866') + """ + + sql """ + alter table time_dim modify column t_hour set stats ('row_count'='86400', 'ndv'='24', 'num_nulls'='0', 'min_value'='0', 'max_value'='23', 'data_size'='345600') + """ + + sql """ + alter table time_dim modify column t_shift set stats ('row_count'='86400', 'ndv'='3', 'num_nulls'='0', 'min_value'='first', 'max_value'='third', 'data_size'='460800') + """ + + sql """ + alter table web_page modify column wp_link_count set stats ('row_count'='3000', 'ndv'='24', 'num_nulls'='27', 'min_value'='2', 'max_value'='25', 'data_size'='12000') + """ + + sql """ + alter table web_page modify column wp_rec_end_date set stats ('row_count'='3000', 'ndv'='3', 'num_nulls'='1500', 'min_value'='1999-09-03', 'max_value'='2001-09-02', 'data_size'='12000') + """ + + sql """ + alter table store_returns modify column sr_cdemo_sk set stats ('row_count'='287999764', 'ndv'='1916366', 'num_nulls'='10076902', 'min_value'='1', 'max_value'='1920800', 'data_size'='2303998112') + """ + + sql """ + alter table store_returns modify column sr_item_sk set stats ('row_count'='287999764', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='2303998112') + """ + + sql """ + alter table store_sales modify column ss_cdemo_sk set stats ('row_count'='2879987999', 'ndv'='1916366', 'num_nulls'='129602155', 'min_value'='1', 'max_value'='1920800', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_ext_discount_amt set stats ('row_count'='2879987999', 'ndv'='1161208', 'num_nulls'='129609101', 'min_value'='0.00', 'max_value'='19778.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_ext_wholesale_cost set stats ('row_count'='2879987999', 'ndv'='393180', 'num_nulls'='129595018', 'min_value'='1.00', 'max_value'='10000.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_list_price set stats ('row_count'='2879987999', 'ndv'='19640', 'num_nulls'='129597020', 'min_value'='1.00', 'max_value'='200.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_net_paid set stats ('row_count'='2879987999', 'ndv'='1288646', 'num_nulls'='129599407', 'min_value'='0.00', 'max_value'='19972.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_sold_date_sk set stats ('row_count'='2879987999', 'ndv'='1820', 'num_nulls'='129600843', 'min_value'='2450816', 'max_value'='2452642', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_sold_time_sk set stats ('row_count'='2879987999', 'ndv'='47252', 'num_nulls'='129593012', 'min_value'='28800', 'max_value'='75599', 'data_size'='23039903992') + """ + + sql """ + alter table ship_mode modify column sm_carrier set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='AIRBORNE', 'max_value'='ZOUROS', 'data_size'='133') + """ + + sql """ + alter table customer modify column c_birth_year set stats ('row_count'='12000000', 'ndv'='69', 'num_nulls'='419584', 'min_value'='1924', 'max_value'='1992', 'data_size'='48000000') + """ + + sql """ + alter table customer modify column c_login set stats ('row_count'='12000000', 'ndv'='1', 'num_nulls'='0', 'min_value'='', 'max_value'='', 'data_size'='0') + """ + + sql """ + alter table customer modify column c_salutation set stats ('row_count'='12000000', 'ndv'='7', 'num_nulls'='0', 'min_value'='', 'max_value'='Sir', 'data_size'='37544445') + """ + + sql """ + alter table reason modify column r_reason_desc set stats ('row_count'='65', 'ndv'='64', 'num_nulls'='0', 'min_value'='Did not fit', 'max_value'='unauthoized purchase', 'data_size'='848') + """ + + sql """ + alter table date_dim modify column d_current_year set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_dom set stats ('row_count'='73049', 'ndv'='31', 'num_nulls'='0', 'min_value'='1', 'max_value'='31', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_same_day_lq set stats ('row_count'='73049', 'ndv'='72231', 'num_nulls'='0', 'min_value'='2414930', 'max_value'='2487978', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_week_seq set stats ('row_count'='73049', 'ndv'='10448', 'num_nulls'='0', 'min_value'='1', 'max_value'='10436', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_weekend set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table warehouse modify column w_zip set stats ('row_count'='20', 'ndv'='18', 'num_nulls'='0', 'min_value'='19231', 'max_value'='89275', 'data_size'='100') + """ + + sql """ + alter table catalog_sales modify column cs_catalog_page_sk set stats ('row_count'='1439980416', 'ndv'='17005', 'num_nulls'='7199032', 'min_value'='1', 'max_value'='25207', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_coupon_amt set stats ('row_count'='1439980416', 'ndv'='1578778', 'num_nulls'='7198116', 'min_value'='0.00', 'max_value'='28730.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_list_price set stats ('row_count'='1439980416', 'ndv'='29396', 'num_nulls'='7201549', 'min_value'='1.00', 'max_value'='300.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_net_profit set stats ('row_count'='1439980416', 'ndv'='2058398', 'num_nulls'='0', 'min_value'='-10000.00', 'max_value'='19962.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_order_number set stats ('row_count'='1439980416', 'ndv'='159051824', 'num_nulls'='0', 'min_value'='1', 'max_value'='160000000', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_ship_hdemo_sk set stats ('row_count'='1439980416', 'ndv'='7251', 'num_nulls'='7201542', 'min_value'='1', 'max_value'='7200', 'data_size'='11519843328') + """ + + sql """ + alter table call_center modify column cc_call_center_sk set stats ('row_count'='42', 'ndv'='42', 'num_nulls'='0', 'min_value'='1', 'max_value'='42', 'data_size'='336') + """ + + sql """ + alter table call_center modify column cc_city set stats ('row_count'='42', 'ndv'='17', 'num_nulls'='0', 'min_value'='Antioch', 'max_value'='Spring Hill', 'data_size'='386') + """ + + sql """ + alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_gmt_offset set stats ('row_count'='42', 'ndv'='4', 'num_nulls'='0', 'min_value'='-8.00', 'max_value'='-5.00', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_hours set stats ('row_count'='42', 'ndv'='3', 'num_nulls'='0', 'min_value'='8AM-12AM', 'max_value'='8AM-8AM', 'data_size'='300') + """ + + sql """ + alter table call_center modify column cc_street_number set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='38', 'max_value'='999', 'data_size'='120') + """ + + sql """ + alter table call_center modify column cc_tax_percentage set stats ('row_count'='42', 'ndv'='12', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='0.12', 'data_size'='168') + """ + + sql """ + alter table inventory modify column inv_date_sk set stats ('row_count'='783000000', 'ndv'='261', 'num_nulls'='0', 'min_value'='2450815', 'max_value'='2452635', 'data_size'='6264000000') + """ + + sql """ + alter table inventory modify column inv_item_sk set stats ('row_count'='783000000', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='6264000000') + """ + + sql """ + alter table catalog_returns modify column cr_fee set stats ('row_count'='143996756', 'ndv'='9958', 'num_nulls'='2882168', 'min_value'='0.50', 'max_value'='100.00', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_return_quantity set stats ('row_count'='143996756', 'ndv'='100', 'num_nulls'='2878774', 'min_value'='1', 'max_value'='100', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_returned_time_sk set stats ('row_count'='143996756', 'ndv'='87677', 'num_nulls'='0', 'min_value'='0', 'max_value'='86399', 'data_size'='1151974048') + """ + + sql """ + alter table household_demographics modify column hd_dep_count set stats ('row_count'='7200', 'ndv'='10', 'num_nulls'='0', 'min_value'='0', 'max_value'='9', 'data_size'='28800') + """ + + sql """ + alter table customer_address modify column ca_county set stats ('row_count'='6000000', 'ndv'='1825', 'num_nulls'='0', 'min_value'='', 'max_value'='Ziebach County', 'data_size'='81254984') + """ + + sql """ + alter table income_band modify column ib_lower_bound set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='0', 'max_value'='190001', 'data_size'='80') + """ + + sql """ + alter table item modify column i_category_id set stats ('row_count'='300000', 'ndv'='10', 'num_nulls'='766', 'min_value'='1', 'max_value'='10', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_class set stats ('row_count'='300000', 'ndv'='100', 'num_nulls'='0', 'min_value'='', 'max_value'='womens watch', 'data_size'='2331199') + """ + + sql """ + alter table item modify column i_container set stats ('row_count'='300000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='2094652') + """ + + sql """ + alter table item modify column i_current_price set stats ('row_count'='300000', 'ndv'='9685', 'num_nulls'='775', 'min_value'='0.09', 'max_value'='99.99', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_manager_id set stats ('row_count'='300000', 'ndv'='100', 'num_nulls'='744', 'min_value'='1', 'max_value'='100', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_size set stats ('row_count'='300000', 'ndv'='8', 'num_nulls'='0', 'min_value'='', 'max_value'='small', 'data_size'='1296134') + """ + + sql """ + alter table web_returns modify column wr_order_number set stats ('row_count'='71997522', 'ndv'='42383708', 'num_nulls'='0', 'min_value'='1', 'max_value'='60000000', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_refunded_cash set stats ('row_count'='71997522', 'ndv'='955369', 'num_nulls'='3240493', 'min_value'='0.00', 'max_value'='26992.92', 'data_size'='287990088') + """ + + sql """ + alter table web_site modify column web_country set stats ('row_count'='54', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='United States', 'data_size'='689') + """ + + sql """ + alter table web_site modify column web_gmt_offset set stats ('row_count'='54', 'ndv'='4', 'num_nulls'='1', 'min_value'='-8.00', 'max_value'='-5.00', 'data_size'='216') + """ + + sql """ + alter table web_site modify column web_market_manager set stats ('row_count'='54', 'ndv'='46', 'num_nulls'='0', 'min_value'='', 'max_value'='Zachery Oneil', 'data_size'='691') + """ + + sql """ + alter table web_site modify column web_site_sk set stats ('row_count'='54', 'ndv'='54', 'num_nulls'='0', 'min_value'='1', 'max_value'='54', 'data_size'='432') + """ + + sql """ + alter table web_site modify column web_street_name set stats ('row_count'='54', 'ndv'='53', 'num_nulls'='0', 'min_value'='', 'max_value'='Wilson Ridge', 'data_size'='471') + """ + + sql """ + alter table web_site modify column web_tax_percentage set stats ('row_count'='54', 'ndv'='13', 'num_nulls'='1', 'min_value'='0.00', 'max_value'='0.12', 'data_size'='216') + """ + + sql """ + alter table promotion modify column p_channel_tv set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1481') + """ + + sql """ + alter table promotion modify column p_response_targe set stats ('row_count'='1500', 'ndv'='1', 'num_nulls'='27', 'min_value'='1', 'max_value'='1', 'data_size'='6000') + """ + + sql """ + alter table web_sales modify column ws_bill_addr_sk set stats ('row_count'='720000376', 'ndv'='6015742', 'num_nulls'='179648', 'min_value'='1', 'max_value'='6000000', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_ext_sales_price set stats ('row_count'='720000376', 'ndv'='1091003', 'num_nulls'='180023', 'min_value'='0.00', 'max_value'='29810.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_net_profit set stats ('row_count'='720000376', 'ndv'='2014057', 'num_nulls'='0', 'min_value'='-10000.00', 'max_value'='19840.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_promo_sk set stats ('row_count'='720000376', 'ndv'='1489', 'num_nulls'='180016', 'min_value'='1', 'max_value'='1500', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_ship_customer_sk set stats ('row_count'='720000376', 'ndv'='12074547', 'num_nulls'='179966', 'min_value'='1', 'max_value'='12000000', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_division_name set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='6965') + """ + + sql """ + alter table store modify column s_floor_space set stats ('row_count'='1002', 'ndv'='752', 'num_nulls'='6', 'min_value'='5002549', 'max_value'='9997773', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_tax_percentage set stats ('row_count'='1002', 'ndv'='12', 'num_nulls'='8', 'min_value'='0.00', 'max_value'='0.11', 'data_size'='4008') + """ + + sql """ + alter table time_dim modify column t_time_id set stats ('row_count'='86400', 'ndv'='85663', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAABAAA', 'max_value'='AAAAAAAAPPPPAAAA', 'data_size'='1382400') + """ + + sql """ + alter table time_dim modify column t_time_sk set stats ('row_count'='86400', 'ndv'='87677', 'num_nulls'='0', 'min_value'='0', 'max_value'='86399', 'data_size'='691200') + """ + + sql """ + alter table store_returns modify column sr_fee set stats ('row_count'='287999764', 'ndv'='9958', 'num_nulls'='10081860', 'min_value'='0.50', 'max_value'='100.00', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_reason_sk set stats ('row_count'='287999764', 'ndv'='65', 'num_nulls'='10087936', 'min_value'='1', 'max_value'='65', 'data_size'='2303998112') + """ + + sql """ + alter table store_returns modify column sr_store_credit set stats ('row_count'='287999764', 'ndv'='698161', 'num_nulls'='10077188', 'min_value'='0.00', 'max_value'='17792.48', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_ticket_number set stats ('row_count'='287999764', 'ndv'='168770768', 'num_nulls'='0', 'min_value'='1', 'max_value'='240000000', 'data_size'='2303998112') + """ + + sql """ + alter table store_sales modify column ss_ext_list_price set stats ('row_count'='2879987999', 'ndv'='770971', 'num_nulls'='129593800', 'min_value'='1.00', 'max_value'='20000.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_ext_sales_price set stats ('row_count'='2879987999', 'ndv'='754248', 'num_nulls'='129589177', 'min_value'='0.00', 'max_value'='19972.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_net_profit set stats ('row_count'='2879987999', 'ndv'='1497362', 'num_nulls'='129572933', 'min_value'='-10000.00', 'max_value'='9986.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_promo_sk set stats ('row_count'='2879987999', 'ndv'='1489', 'num_nulls'='129597096', 'min_value'='1', 'max_value'='1500', 'data_size'='23039903992') + """ + + sql """ + alter table ship_mode modify column sm_code set stats ('row_count'='20', 'ndv'='4', 'num_nulls'='0', 'min_value'='AIR', 'max_value'='SURFACE', 'data_size'='87') + """ + + sql """ + alter table ship_mode modify column sm_contract set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='2mM8l', 'max_value'='yVfotg7Tio3MVhBg6Bkn', 'data_size'='252') + """ + + sql """ + alter table customer modify column c_current_hdemo_sk set stats ('row_count'='12000000', 'ndv'='7251', 'num_nulls'='418736', 'min_value'='1', 'max_value'='7200', 'data_size'='96000000') + """ + + sql """ + alter table dbgen_version modify column dv_create_date set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='2023-07-06', 'max_value'='2023-07-06', 'data_size'='4') + """ + + sql """ + alter table dbgen_version modify column dv_create_time set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='2017-05-13 00:00:00', 'max_value'='2017-05-13 00:00:00', 'data_size'='8') + """ +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query1.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query1.groovy new file mode 100644 index 00000000000000..914a0af2189b84 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query1.groovy @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query1") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'TN' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100""" + qt_ds_shape_1 ''' + explain shape plan + with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'TN' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query10.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query10.groovy new file mode 100644 index 00000000000000..ea222751007140 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query10.groovy @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query10") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Fairfield County','Campbell County','Washtenaw County','Escambia County','Cleburne County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 3 and 3+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 3 ANd 3+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 3 and 3+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100""" + qt_ds_shape_10 ''' + explain shape plan + select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Fairfield County','Campbell County','Washtenaw County','Escambia County','Cleburne County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 3 and 3+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 3 ANd 3+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 3 and 3+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query11.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query11.groovy new file mode 100644 index 00000000000000..09086cf80e9f82 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query11.groovy @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query11") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_email_address + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1998 + and t_s_secyear.dyear = 1998+1 + and t_w_firstyear.dyear = 1998 + and t_w_secyear.dyear = 1998+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_email_address +limit 100""" + qt_ds_shape_11 ''' + explain shape plan + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_email_address + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1998 + and t_s_secyear.dyear = 1998+1 + and t_w_firstyear.dyear = 1998 + and t_w_secyear.dyear = 1998+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_email_address +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query12.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query12.groovy new file mode 100644 index 00000000000000..ae0b8e7452158e --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query12.groovy @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query12") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Men', 'Books', 'Electronics') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-06-15' as date) + and (cast('2001-06-15' as date) + interval 30 day) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""" + qt_ds_shape_12 ''' + explain shape plan + select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Men', 'Books', 'Electronics') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-06-15' as date) + and (cast('2001-06-15' as date) + interval 30 day) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query13.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query13.groovy new file mode 100644 index 00000000000000..55883a36ae7e95 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query13.groovy @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query13") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = 'College' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'W' + and cd_education_status = '2 yr Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('IL', 'TN', 'TX') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WY', 'OH', 'ID') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MS', 'SC', 'IA') + and ss_net_profit between 50 and 250 + )) +""" + qt_ds_shape_13 ''' + explain shape plan + select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = 'College' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'W' + and cd_education_status = '2 yr Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('IL', 'TN', 'TX') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WY', 'OH', 'ID') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MS', 'SC', 'IA') + and ss_net_profit between 50 and 250 + )) + + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query14.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query14.groovy new file mode 100644 index 00000000000000..753fec2e26658f --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query14.groovy @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query14") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) + t where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), +avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100""" + qt_ds_shape_14 ''' + explain shape plan + with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) + t where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), +avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query15.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query15.groovy new file mode 100644 index 00000000000000..5ae2eb30d1ee1c --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query15.groovy @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query15") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2001 + group by ca_zip + order by ca_zip + limit 100""" + qt_ds_shape_15 ''' + explain shape plan + select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2001 + group by ca_zip + order by ca_zip + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query16.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query16.groovy new file mode 100644 index 00000000000000..2e8c4098fa2e85 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query16.groovy @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query16") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + count(distinct cs_order_number) as "order count" + ,sum(cs_ext_ship_cost) as "total shipping cost" + ,sum(cs_net_profit) as "total net profit" +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2002-4-01' and + (cast('2002-4-01' as date) + interval 60 day) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'PA' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Williamson County','Williamson County','Williamson County','Williamson County', + 'Williamson County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100""" + qt_ds_shape_16 ''' + explain shape plan + select + count(distinct cs_order_number) as "order count" + ,sum(cs_ext_ship_cost) as "total shipping cost" + ,sum(cs_net_profit) as "total net profit" +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2002-4-01' and + (cast('2002-4-01' as date) + interval 60 day) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'PA' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Williamson County','Williamson County','Williamson County','Williamson County', + 'Williamson County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query17.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query17.groovy new file mode 100644 index 00000000000000..1c973e7d09aadc --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query17.groovy @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query17") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2001Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2001Q1','2001Q2','2001Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2001Q1','2001Q2','2001Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100""" + qt_ds_shape_17 ''' + explain shape plan + select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2001Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2001Q1','2001Q2','2001Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2001Q1','2001Q2','2001Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query18.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query18.groovy new file mode 100644 index 00000000000000..8592f99318d195 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query18.groovy @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query18") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'F' and + cd1.cd_education_status = 'Primary' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (1,3,7,11,10,4) and + d_year = 2001 and + ca_state in ('AL','MO','TN' + ,'GA','MT','IN','CA') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100""" + qt_ds_shape_18 ''' + explain shape plan + select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'F' and + cd1.cd_education_status = 'Primary' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (1,3,7,11,10,4) and + d_year = 2001 and + ca_state in ('AL','MO','TN' + ,'GA','MT','IN','CA') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query19.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query19.groovy new file mode 100644 index 00000000000000..bb1dca6574e40d --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query19.groovy @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query19") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=14 + and d_moy=11 + and d_year=2002 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 """ + qt_ds_shape_19 ''' + explain shape plan + select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=14 + and d_moy=11 + and d_year=2002 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query2.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query2.groovy new file mode 100644 index 00000000000000..7ae18743e60774 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query2.groovy @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query2") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales) t), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1""" + qt_ds_shape_2 ''' + explain shape plan + with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales) t), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 1998+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query20.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query20.groovy new file mode 100644 index 00000000000000..914be027b5b4d8 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query20.groovy @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query20") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Books', 'Music', 'Sports') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2002-06-18' as date) + and (cast('2002-06-18' as date) + interval 30 day) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100""" + qt_ds_shape_20 ''' + explain shape plan + select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Books', 'Music', 'Sports') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2002-06-18' as date) + and (cast('2002-06-18' as date) + interval 30 day) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query21.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query21.groovy new file mode 100644 index 00000000000000..b8e7943e7fd265 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query21.groovy @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query21") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'SET enable_fold_constant_by_be = false' //plan shape will be different + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1999-06-22' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1999-06-22' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1999-06-22' as date) - interval 30 day) + and (cast ('1999-06-22' as date) + interval 30 day) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100""" + qt_ds_shape_21 ''' + explain shape plan + select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1999-06-22' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1999-06-22' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1999-06-22' as date) - interval 30 day) + and (cast ('1999-06-22' as date) + interval 30 day) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query22.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query22.groovy new file mode 100644 index 00000000000000..28907748d9b2ba --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query22.groovy @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query22") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1200 and 1200 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100""" + qt_ds_shape_22 ''' + explain shape plan + select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1200 and 1200 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query23.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query23.groovy new file mode 100644 index 00000000000000..8a384adc63eb10 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query23.groovy @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query23") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + multi_sql """ + use ${db}; + set enable_nereids_planner=true; + set enable_nereids_distribute_planner=false; + set enable_fallback_to_original_planner=false; + set exec_mem_limit=21G; + set be_number_for_test=3; + set enable_runtime_filter_prune=false; + set parallel_pipeline_task_num=8; + set forbid_unknown_col_stats=false; + set enable_stats=true; + set runtime_filter_type=8; + set broadcast_row_count_limit = 30000000; + set enable_nereids_timeout = false; + set enable_pipeline_engine = true; + set disable_nereids_rules='PRUNE_EMPTY_PARTITION'; + set push_topn_to_agg = true; + set topn_opt_limit_threshold=1024; + """ + + def ds = """with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk) t), +best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 7 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 7 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) t2 + limit 100""" + qt_ds_shape_23 ''' + explain shape plan + with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk) t), +best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 7 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 7 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) t2 + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query24.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query24.groovy new file mode 100644 index 00000000000000..07aab6781c5a5c --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query24.groovy @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query24") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=5 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'aquamarine' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name +""" + qt_ds_shape_24 ''' + explain shape plan + with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=5 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'aquamarine' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name + + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query25.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query25.groovy new file mode 100644 index 00000000000000..f9b37733278bc4 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query25.groovy @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query25") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,max(ss_net_profit) as store_sales_profit + ,max(sr_net_loss) as store_returns_loss + ,max(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 1999 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""" + qt_ds_shape_25 ''' + explain shape plan + select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,max(ss_net_profit) as store_sales_profit + ,max(sr_net_loss) as store_returns_loss + ,max(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 1999 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query26.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query26.groovy new file mode 100644 index 00000000000000..e599258eb5453b --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query26.groovy @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query26") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'M' and + cd_marital_status = 'W' and + cd_education_status = 'Unknown' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2002 + group by i_item_id + order by i_item_id + limit 100""" + qt_ds_shape_26 ''' + explain shape plan + select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'M' and + cd_marital_status = 'W' and + cd_education_status = 'Unknown' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2002 + group by i_item_id + order by i_item_id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query27.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query27.groovy new file mode 100644 index 00000000000000..312b29a2056d36 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query27.groovy @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query27") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'W' and + cd_education_status = 'Secondary' and + d_year = 1999 and + s_state in ('TN','TN', 'TN', 'TN', 'TN', 'TN') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100""" + qt_ds_shape_27 ''' + explain shape plan + select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'W' and + cd_education_status = 'Secondary' and + d_year = 1999 and + s_state in ('TN','TN', 'TN', 'TN', 'TN', 'TN') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query28.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query28.groovy new file mode 100644 index 00000000000000..a8151135102b1c --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query28.groovy @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query28") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 107 and 107+10 + or ss_coupon_amt between 1319 and 1319+1000 + or ss_wholesale_cost between 60 and 60+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 23 and 23+10 + or ss_coupon_amt between 825 and 825+1000 + or ss_wholesale_cost between 43 and 43+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 74 and 74+10 + or ss_coupon_amt between 4381 and 4381+1000 + or ss_wholesale_cost between 57 and 57+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 89 and 89+10 + or ss_coupon_amt between 3117 and 3117+1000 + or ss_wholesale_cost between 68 and 68+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 58 and 58+10 + or ss_coupon_amt between 9402 and 9402+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 64 and 64+10 + or ss_coupon_amt between 5792 and 5792+1000 + or ss_wholesale_cost between 73 and 73+20)) B6 +limit 100""" + qt_ds_shape_28 ''' + explain shape plan + select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 107 and 107+10 + or ss_coupon_amt between 1319 and 1319+1000 + or ss_wholesale_cost between 60 and 60+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 23 and 23+10 + or ss_coupon_amt between 825 and 825+1000 + or ss_wholesale_cost between 43 and 43+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 74 and 74+10 + or ss_coupon_amt between 4381 and 4381+1000 + or ss_wholesale_cost between 57 and 57+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 89 and 89+10 + or ss_coupon_amt between 3117 and 3117+1000 + or ss_wholesale_cost between 68 and 68+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 58 and 58+10 + or ss_coupon_amt between 9402 and 9402+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 64 and 64+10 + or ss_coupon_amt between 5792 and 5792+1000 + or ss_wholesale_cost between 73 and 73+20)) B6 +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query29.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query29.groovy new file mode 100644 index 00000000000000..e1116a865dd8da --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query29.groovy @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query29") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,max(ss_quantity) as store_sales_quantity + ,max(sr_return_quantity) as store_returns_quantity + ,max(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1998 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1998 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1998,1998+1,1998+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100""" + qt_ds_shape_29 ''' + explain shape plan + select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,max(ss_quantity) as store_sales_quantity + ,max(sr_return_quantity) as store_returns_quantity + ,max(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1998 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1998 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1998,1998+1,1998+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query3.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query3.groovy new file mode 100644 index 00000000000000..aace9720f393ff --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query3.groovy @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query3") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 816 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100""" + qt_ds_shape_3 ''' + explain shape plan + select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 816 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query30.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query30.groovy new file mode 100644 index 00000000000000..6afc169c8ae2dc --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query30.groovy @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query30") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2000 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date_sk,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'AR' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date_sk,ctr_total_return +limit 100""" + qt_ds_shape_30 ''' + explain shape plan + with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2000 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date_sk,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'AR' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date_sk,ctr_total_return +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query31.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query31.groovy new file mode 100644 index 00000000000000..bb11fdd0951d77 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query31.groovy @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query31") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 1999 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 1999 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 1999 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 1999 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 1999 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =1999 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by store_q2_q3_increase""" + qt_ds_shape_31 ''' + explain shape plan + with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 1999 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 1999 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 1999 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 1999 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 1999 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =1999 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by store_q2_q3_increase + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query32.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query32.groovy new file mode 100644 index 00000000000000..37185f3ed80424 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query32.groovy @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query32") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + multi_sql """ + use ${db}; + set enable_nereids_planner=true; + set enable_nereids_distribute_planner=false; + set enable_fallback_to_original_planner=false; + set exec_mem_limit=21G; + set be_number_for_test=3; + set enable_runtime_filter_prune=false; + set parallel_pipeline_task_num=8; + set forbid_unknown_col_stats=false; + set enable_stats=true; + set runtime_filter_type=8; + set broadcast_row_count_limit = 30000000; + set enable_nereids_timeout = false; + set enable_pipeline_engine = true; + set disable_nereids_rules='PRUNE_EMPTY_PARTITION'; + set push_topn_to_agg = true; + set topn_opt_limit_threshold=1024; + """ + + def ds = """select sum(cs_ext_discount_amt) as "excess discount amount" +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 722 +and i_item_sk = cs_item_sk +and d_date between '2001-03-09' and + (cast('2001-03-09' as date) + interval 90 day) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '2001-03-09' and + (cast('2001-03-09' as date) + interval 90 day) + and d_date_sk = cs_sold_date_sk + ) +limit 100""" + qt_ds_shape_32 ''' + explain shape plan + select sum(cs_ext_discount_amt) as "excess discount amount" +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 722 +and i_item_sk = cs_item_sk +and d_date between '2001-03-09' and + (cast('2001-03-09' as date) + interval 90 day) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '2001-03-09' and + (cast('2001-03-09' as date) + interval 90 day) + and d_date_sk = cs_sold_date_sk + ) +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query33.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query33.groovy new file mode 100644 index 00000000000000..96697c0d984595 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query33.groovy @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query33") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100""" + qt_ds_shape_33 ''' + explain shape plan + with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query34.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query34.groovy new file mode 100644 index 00000000000000..c222d6b0bece02 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query34.groovy @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query34") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '1001-5000' or + household_demographics.hd_buy_potential = '0-500') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County', + 'Williamson County','Williamson County','Williamson County','Williamson County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number""" + qt_ds_shape_34 ''' + explain shape plan + select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '1001-5000' or + household_demographics.hd_buy_potential = '0-500') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County', + 'Williamson County','Williamson County','Williamson County','Williamson County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query35.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query35.groovy new file mode 100644 index 00000000000000..f4bdeb2db8c61c --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query35.groovy @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query35") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + avg(cd_dep_count), + stddev_samp(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + stddev_samp(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + stddev_samp(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100""" + qt_ds_shape_35 ''' + explain shape plan + select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + avg(cd_dep_count), + stddev_samp(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + stddev_samp(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + stddev_samp(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query36.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query36.groovy new file mode 100644 index 00000000000000..a1310d235ec796 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query36.groovy @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query36") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('TN','TN','TN','TN', + 'TN','TN','TN','TN') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100""" + qt_ds_shape_36 ''' + explain shape plan + select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('TN','TN','TN','TN', + 'TN','TN','TN','TN') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query37.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query37.groovy new file mode 100644 index 00000000000000..5d48768e9b866b --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query37.groovy @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query37") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 29 and 29 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-03-29' as date) and (cast('2002-03-29' as date) + interval 60 day) + and i_manufact_id in (705,742,777,944) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""" + qt_ds_shape_37 ''' + explain shape plan + select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 29 and 29 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2002-03-29' as date) and (cast('2002-03-29' as date) + interval 60 day) + and i_manufact_id in (705,742,777,944) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query38.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query38.groovy new file mode 100644 index 00000000000000..06e3857fb30f3f --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query38.groovy @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query38") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + multi_sql """ + use ${db}; + set enable_nereids_planner=true; + set enable_nereids_distribute_planner=false; + set enable_fallback_to_original_planner=false; + set exec_mem_limit=21G; + set be_number_for_test=3; + set enable_runtime_filter_prune=false; + set parallel_pipeline_task_num=8; + set forbid_unknown_col_stats=false; + set enable_stats=true; + set runtime_filter_type=8; + set broadcast_row_count_limit = 30000000; + set enable_nereids_timeout = false; + set enable_pipeline_engine = true; + set disable_nereids_rules='PRUNE_EMPTY_PARTITION'; + set push_topn_to_agg = true; + set topn_opt_limit_threshold=1024; + """ + + def ds = """select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 +) hot_cust +limit 100""" + qt_ds_shape_38 ''' + explain shape plan + select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189 + 11 +) hot_cust +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query39.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query39.groovy new file mode 100644 index 00000000000000..a7e321c22656f8 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query39.groovy @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query39") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2000 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=1 + and inv2.d_moy=1+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov""" + qt_ds_shape_39 ''' + explain shape plan + with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2000 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=1 + and inv2.d_moy=1+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query4.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query4.groovy new file mode 100644 index 00000000000000..f03af2536173de --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query4.groovy @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query4") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100""" + qt_ds_shape_4 ''' + explain shape plan + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_c_firstyear.dyear = 1999 + and t_c_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_birth_country +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query40.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query40.groovy new file mode 100644 index 00000000000000..38cfafa6560581 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query40.groovy @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query40") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2001-05-02' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('2001-05-02' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('2001-05-02' as date) - interval 30 day) + and (cast ('2001-05-02' as date) + interval 30 day) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100""" + qt_ds_shape_40 ''' + explain shape plan + select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2001-05-02' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('2001-05-02' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('2001-05-02' as date) - interval 30 day) + and (cast ('2001-05-02' as date) + interval 30 day) + group by + w_state,i_item_id + order by w_state,i_item_id +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query41.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query41.groovy new file mode 100644 index 00000000000000..b9c4a9231e533c --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query41.groovy @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query41") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select distinct(i_product_name) + from item i1 + where i_manufact_id between 704 and 704+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'forest' or i_color = 'lime') and + (i_units = 'Pallet' or i_units = 'Pound') and + (i_size = 'economy' or i_size = 'small') + ) or + (i_category = 'Women' and + (i_color = 'navy' or i_color = 'slate') and + (i_units = 'Gross' or i_units = 'Bunch') and + (i_size = 'extra large' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'powder' or i_color = 'sky') and + (i_units = 'Dozen' or i_units = 'Lb') and + (i_size = 'N/A' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'maroon' or i_color = 'smoke') and + (i_units = 'Ounce' or i_units = 'Case') and + (i_size = 'economy' or i_size = 'small') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'dark' or i_color = 'aquamarine') and + (i_units = 'Ton' or i_units = 'Tbl') and + (i_size = 'economy' or i_size = 'small') + ) or + (i_category = 'Women' and + (i_color = 'frosted' or i_color = 'plum') and + (i_units = 'Dram' or i_units = 'Box') and + (i_size = 'extra large' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'papaya' or i_color = 'peach') and + (i_units = 'Bundle' or i_units = 'Carton') and + (i_size = 'N/A' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'firebrick' or i_color = 'sienna') and + (i_units = 'Cup' or i_units = 'Each') and + (i_size = 'economy' or i_size = 'small') + )))) > 0 + order by i_product_name + limit 100""" + qt_ds_shape_41 ''' + explain shape plan + select distinct(i_product_name) + from item i1 + where i_manufact_id between 704 and 704+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'forest' or i_color = 'lime') and + (i_units = 'Pallet' or i_units = 'Pound') and + (i_size = 'economy' or i_size = 'small') + ) or + (i_category = 'Women' and + (i_color = 'navy' or i_color = 'slate') and + (i_units = 'Gross' or i_units = 'Bunch') and + (i_size = 'extra large' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'powder' or i_color = 'sky') and + (i_units = 'Dozen' or i_units = 'Lb') and + (i_size = 'N/A' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'maroon' or i_color = 'smoke') and + (i_units = 'Ounce' or i_units = 'Case') and + (i_size = 'economy' or i_size = 'small') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'dark' or i_color = 'aquamarine') and + (i_units = 'Ton' or i_units = 'Tbl') and + (i_size = 'economy' or i_size = 'small') + ) or + (i_category = 'Women' and + (i_color = 'frosted' or i_color = 'plum') and + (i_units = 'Dram' or i_units = 'Box') and + (i_size = 'extra large' or i_size = 'petite') + ) or + (i_category = 'Men' and + (i_color = 'papaya' or i_color = 'peach') and + (i_units = 'Bundle' or i_units = 'Carton') and + (i_size = 'N/A' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'firebrick' or i_color = 'sienna') and + (i_units = 'Cup' or i_units = 'Each') and + (i_size = 'economy' or i_size = 'small') + )))) > 0 + order by i_product_name + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query42.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query42.groovy new file mode 100644 index 00000000000000..e4f6a9da4b376a --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query42.groovy @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query42") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 """ + qt_ds_shape_42 ''' + explain shape plan + select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query43.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query43.groovy new file mode 100644 index 00000000000000..e4231b09aa6738 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query43.groovy @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query43") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -5 and + d_year = 2000 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100""" + qt_ds_shape_43 ''' + explain shape plan + select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -5 and + d_year = 2000 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query44.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query44.groovy new file mode 100644 index 00000000000000..fc972137d59b27 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query44.groovy @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query44") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 4 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 4 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 4 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 4 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100""" + qt_ds_shape_44 ''' + explain shape plan + select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 4 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 4 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 4 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 4 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query45.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query45.groovy new file mode 100644 index 00000000000000..ee73c4d7287a41 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query45.groovy @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query45") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select ca_zip, ca_city, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2000 + group by ca_zip, ca_city + order by ca_zip, ca_city + limit 100""" + qt_ds_shape_45 ''' + explain shape plan + select ca_zip, ca_city, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2000 + group by ca_zip, ca_city + order by ca_zip, ca_city + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query46.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query46.groovy new file mode 100644 index 00000000000000..dd33c8ae9192d7 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query46.groovy @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query46") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 8 or + household_demographics.hd_vehicle_count= 0) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_city in ('Midway','Fairview','Fairview','Midway','Fairview') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100""" + qt_ds_shape_46 ''' + explain shape plan + select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 8 or + household_demographics.hd_vehicle_count= 0) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_city in ('Midway','Fairview','Fairview','Midway','Fairview') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query47.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query47.groovy new file mode 100644 index 00000000000000..d523df26a65240 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query47.groovy @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query47") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.s_store_name, v1.s_company_name + ,v1.d_year + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, nsum + limit 100""" + qt_ds_shape_47 ''' + explain shape plan + with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.s_store_name, v1.s_company_name + ,v1.d_year + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, nsum + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query48.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query48.groovy new file mode 100644 index 00000000000000..3a2258662d5e48 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query48.groovy @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query48") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'S' + and + cd_education_status = 'Secondary' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '2 yr Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'D' + and + cd_education_status = 'Advanced Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('ND', 'NY', 'SD') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MD', 'GA', 'KS') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('CO', 'MN', 'NC') + and ss_net_profit between 50 and 25000 + ) + ) +""" + qt_ds_shape_48 ''' + explain shape plan + select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'S' + and + cd_education_status = 'Secondary' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '2 yr Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'D' + and + cd_education_status = 'Advanced Degree' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('ND', 'NY', 'SD') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('MD', 'GA', 'KS') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('CO', 'MN', 'NC') + and ss_net_profit between 50 and 25000 + ) + ) + + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query49.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query49.groovy new file mode 100644 index 00000000000000..a250ee0abc49cc --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query49.groovy @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query49") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 11 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 11 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 11 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + t order by 1,4,5,2 + limit 100""" + qt_ds_shape_49 ''' + explain shape plan + select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 11 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 11 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 11 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + t order by 1,4,5,2 + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query5.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query5.groovy new file mode 100644 index 00000000000000..1b549f2707ae23 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query5.groovy @@ -0,0 +1,292 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query5") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + interval 14 day) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + interval 14 day) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + interval 14 day) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , concat('store', s_store_id) id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , concat('catalog_page', cp_catalog_page_id) id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , concat('web_site', web_site_id) id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""" + qt_ds_shape_5 ''' + explain shape plan + with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + interval 14 day) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + interval 14 day) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-19' as date) + and (cast('2000-08-19' as date) + interval 14 day) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , concat('store', s_store_id) id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , concat('catalog_page', cp_catalog_page_id) id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , concat('web_site', web_site_id) id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query50.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query50.groovy new file mode 100644 index 00000000000000..8ccc569590b75f --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query50.groovy @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query50") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days" + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days" + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days" + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days" + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as ">120 days" +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2001 +and d2.d_moy = 8 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100""" + qt_ds_shape_50 ''' + explain shape plan + select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days" + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days" + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days" + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days" + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as ">120 days" +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2001 +and d2.d_moy = 8 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query51.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query51.groovy new file mode 100644 index 00000000000000..a98b98ec910062 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query51.groovy @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query51") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100""" + qt_ds_shape_51 ''' + explain shape plan + WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query52.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query52.groovy new file mode 100644 index 00000000000000..880f3afd167272 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query52.groovy @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query52") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=2000 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 """ + qt_ds_shape_52 ''' + explain shape plan + select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=2000 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query53.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query53.groovy new file mode 100644 index 00000000000000..42e7a3474ba745 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query53.groovy @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query53") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1186,1186+1,1186+2,1186+3,1186+4,1186+5,1186+6,1186+7,1186+8,1186+9,1186+10,1186+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100""" + qt_ds_shape_53 ''' + explain shape plan + select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1186,1186+1,1186+2,1186+3,1186+4,1186+5,1186+6,1186+7,1186+8,1186+9,1186+10,1186+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query54.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query54.groovy new file mode 100644 index 00000000000000..ce8295e9299723 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query54.groovy @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query54") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=12' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Music' + and i_class = 'country' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 1 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 1) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 1) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100""" + qt_ds_shape_54 ''' + explain shape plan + with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Music' + and i_class = 'country' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 1 + and d_year = 1999 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1999 and d_moy = 1) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1999 and d_moy = 1) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query55.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query55.groovy new file mode 100644 index 00000000000000..70fe7265786519 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query55.groovy @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query55") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=52 + and d_moy=11 + and d_year=2000 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 """ + qt_ds_shape_55 ''' + explain shape plan + select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=52 + and d_moy=11 + and d_year=2000 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query56.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query56.groovy new file mode 100644 index 00000000000000..2aaceceedb6e45 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query56.groovy @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query56") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('powder','orchid','pink')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('powder','orchid','pink')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('powder','orchid','pink')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + limit 100""" + qt_ds_shape_56 ''' + explain shape plan + with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('powder','orchid','pink')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 3 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('powder','orchid','pink')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 3 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('powder','orchid','pink')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 3 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query57.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query57.groovy new file mode 100644 index 00000000000000..47ca56f0650d3a --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query57.groovy @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query57") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2001 or + ( d_year = 2001-1 and d_moy =12) or + ( d_year = 2001+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.cc_name + ,v1.d_year + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2001 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, avg_monthly_sales + limit 100""" + qt_ds_shape_57 ''' + explain shape plan + with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2001 or + ( d_year = 2001-1 and d_moy =12) or + ( d_year = 2001+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.cc_name + ,v1.d_year + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2001 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, avg_monthly_sales + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query58.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query58.groovy new file mode 100644 index 00000000000000..0cb76fca23534c --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query58.groovy @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query58") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2001-06-16')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2001-06-16')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '2001-06-16')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100""" + qt_ds_shape_58 ''' + explain shape plan + with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2001-06-16')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2001-06-16')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '2001-06-16')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query59.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query59.groovy new file mode 100644 index 00000000000000..3b3ecfd9068fcc --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query59.groovy @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query59") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + wss.ss_store_sk = s_store_sk and + d_month_seq between 1195 and 1195 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + wss.ss_store_sk = s_store_sk and + d_month_seq between 1195+ 12 and 1195 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100""" + qt_ds_shape_59 ''' + explain shape plan + with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + wss.ss_store_sk = s_store_sk and + d_month_seq between 1195 and 1195 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + wss.ss_store_sk = s_store_sk and + d_month_seq between 1195+ 12 and 1195 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query6.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query6.groovy new file mode 100644 index 00000000000000..b1a51aa274d4f5 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query6.groovy @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query6") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2002 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + limit 100""" + qt_ds_shape_6 ''' + explain shape plan + select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2002 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query60.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query60.groovy new file mode 100644 index 00000000000000..f323b96ed5683a --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query60.groovy @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query60") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Jewelry')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 10 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Jewelry')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 10 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Jewelry')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 10 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100""" + qt_ds_shape_60 ''' + explain shape plan + with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Jewelry')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 10 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Jewelry')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 10 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Jewelry')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 10 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query61.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query61.groovy new file mode 100644 index 00000000000000..de78bd3fe46242 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query61.groovy @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query61") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Home' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 2000 + and d_moy = 12) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Home' + and s_gmt_offset = -7 + and d_year = 2000 + and d_moy = 12) all_sales +order by promotions, total +limit 100""" + qt_ds_shape_61 ''' + explain shape plan + select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Home' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 2000 + and d_moy = 12) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Home' + and s_gmt_offset = -7 + and d_year = 2000 + and d_moy = 12) all_sales +order by promotions, total +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query62.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query62.groovy new file mode 100644 index 00000000000000..de6a1615115134 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query62.groovy @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query62") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days" + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days" + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days" + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days" + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as ">120 days" +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1223 and 1223 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +limit 100""" + qt_ds_shape_62 ''' + explain shape plan + select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days" + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days" + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days" + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days" + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as ">120 days" +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1223 and 1223 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query63.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query63.groovy new file mode 100644 index 00000000000000..34b10eaefd827a --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query63.groovy @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query63") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1222,1222+1,1222+2,1222+3,1222+4,1222+5,1222+6,1222+7,1222+8,1222+9,1222+10,1222+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100""" + qt_ds_shape_63 ''' + explain shape plan + select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1222,1222+1,1222+2,1222+3,1222+4,1222+5,1222+6,1222+7,1222+8,1222+9,1222+10,1222+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query64.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query64.groovy new file mode 100644 index 00000000000000..ef89e0bd0a7bc5 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query64.groovy @@ -0,0 +1,279 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query64") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + sql "set memo_max_group_expression_size = 1000000" + + def ds = """with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('orange','lace','lawn','misty','blush','pink') and + i_current_price between 48 and 48 + 10 and + i_current_price between 48 + 1 and 48 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 1999 and + cs2.syear = 1999 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt + ,cs1.s1 + ,cs2.s1""" +// qt_ds_shape_64 ''' +// explain shape plan +// with cs_ui as +// (select cs_item_sk +// ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund +// from catalog_sales +// ,catalog_returns +// where cs_item_sk = cr_item_sk +// and cs_order_number = cr_order_number +// group by cs_item_sk +// having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +// cross_sales as +// (select i_product_name product_name +// ,i_item_sk item_sk +// ,s_store_name store_name +// ,s_zip store_zip +// ,ad1.ca_street_number b_street_number +// ,ad1.ca_street_name b_street_name +// ,ad1.ca_city b_city +// ,ad1.ca_zip b_zip +// ,ad2.ca_street_number c_street_number +// ,ad2.ca_street_name c_street_name +// ,ad2.ca_city c_city +// ,ad2.ca_zip c_zip +// ,d1.d_year as syear +// ,d2.d_year as fsyear +// ,d3.d_year s2year +// ,count(*) cnt +// ,sum(ss_wholesale_cost) s1 +// ,sum(ss_list_price) s2 +// ,sum(ss_coupon_amt) s3 +// FROM store_sales +// ,store_returns +// ,cs_ui +// ,date_dim d1 +// ,date_dim d2 +// ,date_dim d3 +// ,store +// ,customer +// ,customer_demographics cd1 +// ,customer_demographics cd2 +// ,promotion +// ,household_demographics hd1 +// ,household_demographics hd2 +// ,customer_address ad1 +// ,customer_address ad2 +// ,income_band ib1 +// ,income_band ib2 +// ,item +// WHERE ss_store_sk = s_store_sk AND +// ss_sold_date_sk = d1.d_date_sk AND +// ss_customer_sk = c_customer_sk AND +// ss_cdemo_sk= cd1.cd_demo_sk AND +// ss_hdemo_sk = hd1.hd_demo_sk AND +// ss_addr_sk = ad1.ca_address_sk and +// ss_item_sk = i_item_sk and +// ss_item_sk = sr_item_sk and +// ss_ticket_number = sr_ticket_number and +// ss_item_sk = cs_ui.cs_item_sk and +// c_current_cdemo_sk = cd2.cd_demo_sk AND +// c_current_hdemo_sk = hd2.hd_demo_sk AND +// c_current_addr_sk = ad2.ca_address_sk and +// c_first_sales_date_sk = d2.d_date_sk and +// c_first_shipto_date_sk = d3.d_date_sk and +// ss_promo_sk = p_promo_sk and +// hd1.hd_income_band_sk = ib1.ib_income_band_sk and +// hd2.hd_income_band_sk = ib2.ib_income_band_sk and +// cd1.cd_marital_status <> cd2.cd_marital_status and +// i_color in ('orange','lace','lawn','misty','blush','pink') and +// i_current_price between 48 and 48 + 10 and +// i_current_price between 48 + 1 and 48 + 15 +// group by i_product_name +// ,i_item_sk +// ,s_store_name +// ,s_zip +// ,ad1.ca_street_number +// ,ad1.ca_street_name +// ,ad1.ca_city +// ,ad1.ca_zip +// ,ad2.ca_street_number +// ,ad2.ca_street_name +// ,ad2.ca_city +// ,ad2.ca_zip +// ,d1.d_year +// ,d2.d_year +// ,d3.d_year +// ) +// select cs1.product_name +// ,cs1.store_name +// ,cs1.store_zip +// ,cs1.b_street_number +// ,cs1.b_street_name +// ,cs1.b_city +// ,cs1.b_zip +// ,cs1.c_street_number +// ,cs1.c_street_name +// ,cs1.c_city +// ,cs1.c_zip +// ,cs1.syear +// ,cs1.cnt +// ,cs1.s1 as s11 +// ,cs1.s2 as s21 +// ,cs1.s3 as s31 +// ,cs2.s1 as s12 +// ,cs2.s2 as s22 +// ,cs2.s3 as s32 +// ,cs2.syear +// ,cs2.cnt +// from cross_sales cs1,cross_sales cs2 +// where cs1.item_sk=cs2.item_sk and +// cs1.syear = 1999 and +// cs2.syear = 1999 + 1 and +// cs2.cnt <= cs1.cnt and +// cs1.store_name = cs2.store_name and +// cs1.store_zip = cs2.store_zip +// order by cs1.product_name +// ,cs1.store_name +// ,cs2.cnt +// ,cs1.s1 +// ,cs2.s1 +// ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query65.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query65.groovy new file mode 100644 index 00000000000000..4aeb9cb88d9c5d --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query65.groovy @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query65") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1176 and 1176+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1176 and 1176+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100""" + qt_ds_shape_65 ''' + explain shape plan + select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1176 and 1176+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1176 and 1176+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query66.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query66.groovy new file mode 100644 index 00000000000000..3ed91231c6c2a5 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query66.groovy @@ -0,0 +1,476 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query66") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,concat(concat('ORIENTAL ', ','), ' BOXBUNDLES') as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_ext_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_ext_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_ext_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_ext_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_ext_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_ext_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_ext_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_ext_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_ext_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_ext_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_ext_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_ext_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 42970 and 42970+28800 + and sm_carrier in ('ORIENTAL','BOXBUNDLES') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,concat(concat('ORIENTAL ', ','), ' BOXBUNDLES') as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_list_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_list_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_list_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_list_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_list_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_list_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_list_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_list_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_list_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_list_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_list_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_list_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 42970 AND 42970+28800 + and sm_carrier in ('ORIENTAL','BOXBUNDLES') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100""" + qt_ds_shape_66 ''' + explain shape plan + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,concat(concat('ORIENTAL ', ','), ' BOXBUNDLES') as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_ext_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_ext_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_ext_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_ext_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_ext_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_ext_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_ext_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_ext_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_ext_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_ext_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_ext_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_ext_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid_inc_ship * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 42970 and 42970+28800 + and sm_carrier in ('ORIENTAL','BOXBUNDLES') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,concat(concat('ORIENTAL ', ','), ' BOXBUNDLES') as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_ext_list_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_ext_list_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_ext_list_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_ext_list_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_ext_list_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_ext_list_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_ext_list_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_ext_list_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_ext_list_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_ext_list_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_ext_list_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_ext_list_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 42970 AND 42970+28800 + and sm_carrier in ('ORIENTAL','BOXBUNDLES') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query67.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query67.groovy new file mode 100644 index 00000000000000..dd7d2c1bfec035 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query67.groovy @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query67") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1217 and 1217+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100""" + qt_ds_shape_67 ''' + explain shape plan + select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1217 and 1217+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query68.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query68.groovy new file mode 100644 index 00000000000000..6e721e5eabf45d --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query68.groovy @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query68") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 3 or + household_demographics.hd_vehicle_count= 4) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Fairview','Midway') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100""" + qt_ds_shape_68 ''' + explain shape plan + select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 3 or + household_demographics.hd_vehicle_count= 4) + and date_dim.d_year in (1998,1998+1,1998+2) + and store.s_city in ('Fairview','Midway') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query69.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query69.groovy new file mode 100644 index 00000000000000..dafcb98e9265aa --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query69.groovy @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query69") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('IL','TX','ME') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100""" + qt_ds_shape_69 ''' + explain shape plan + select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('IL','TX','ME') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 1 and 1+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 1 and 1+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 1 and 1+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query7.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query7.groovy new file mode 100644 index 00000000000000..af7f9913c68037 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query7.groovy @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query7") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100""" + qt_ds_shape_7 ''' + explain shape plan + select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query70.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query70.groovy new file mode 100644 index 00000000000000..beab5d0277e690 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query70.groovy @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query70") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1220 and 1220+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1220 and 1220+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100""" + qt_ds_shape_70 ''' + explain shape plan + select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1220 and 1220+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1220 and 1220+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query71.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query71.groovy new file mode 100644 index 00000000000000..d52f597c6b258d --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query71.groovy @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query71") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2002 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2002 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2002 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + """ + qt_ds_shape_71 ''' + explain shape plan + select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2002 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2002 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2002 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query72.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query72.groovy new file mode 100644 index 00000000000000..aa2d674cc33f16 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query72.groovy @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query72") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and (d3.d_date > (d1.d_date + INTERVAL '5' DAY)) + and hd_buy_potential = '1001-5000' + and d1.d_year = 1998 + and cd_marital_status = 'S' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100""" + qt_ds_shape_72 ''' + explain shape plan + select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and (d3.d_date > (d1.d_date + INTERVAL '5' DAY)) + and hd_buy_potential = '1001-5000' + and d1.d_year = 1998 + and cd_marital_status = 'S' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query73.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query73.groovy new file mode 100644 index 00000000000000..bac050095a8835 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query73.groovy @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query73") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '1001-5000' or + household_demographics.hd_buy_potential = '5001-10000') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc""" + qt_ds_shape_73 ''' + explain shape plan + select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '1001-5000' or + household_demographics.hd_buy_potential = '5001-10000') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query74.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query74.groovy new file mode 100644 index 00000000000000..a0ef28422cd655 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query74.groovy @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query74") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1999,1999+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (1999,1999+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 1999 + and t_s_secyear.year = 1999+1 + and t_w_firstyear.year = 1999 + and t_w_secyear.year = 1999+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 1,3,2 +limit 100""" + qt_ds_shape_74 ''' + explain shape plan + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1999,1999+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (1999,1999+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 1999 + and t_s_secyear.year = 1999+1 + and t_w_firstyear.year = 1999 + and t_w_secyear.year = 1999+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 1,3,2 +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query75.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query75.groovy new file mode 100644 index 00000000000000..5109f9a2acbbe3 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query75.groovy @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query75") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + limit 100""" + qt_ds_shape_75 ''' + explain shape plan + WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query76.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query76.groovy new file mode 100644 index 00000000000000..e4ab83d22d61ef --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query76.groovy @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query76") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_customer_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_customer_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_promo_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_promo_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_bill_customer_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_bill_customer_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100""" + qt_ds_shape_76 ''' + explain shape plan + select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_customer_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_customer_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_promo_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_promo_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_bill_customer_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_bill_customer_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query77.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query77.groovy new file mode 100644 index 00000000000000..3e5fada6bb4aef --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query77.groovy @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query77") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""" + qt_ds_shape_77 ''' + explain shape plan + with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-10' as date) + and (cast('2000-08-10' as date) + interval 30 day) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query78.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query78.groovy new file mode 100644 index 00000000000000..f8186f83278da2 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query78.groovy @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query78") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null and d_year=1998 + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null and d_year=1998 + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null and d_year=1998 + group by d_year, ss_item_sk, ss_customer_sk + ) +select +ss_customer_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=1998 +order by + ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio +limit 100""" + qt_ds_shape_78 ''' + explain shape plan + with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null and d_year=1998 + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null and d_year=1998 + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null and d_year=1998 + group by d_year, ss_item_sk, ss_customer_sk + ) +select +ss_customer_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=1998 +order by + ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query79.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query79.groovy new file mode 100644 index 00000000000000..2d43c4cda6db31 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query79.groovy @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query79") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 7 or household_demographics.hd_vehicle_count > -1) + and date_dim.d_dow = 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100""" + qt_ds_shape_79 ''' + explain shape plan + select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 7 or household_demographics.hd_vehicle_count > -1) + and date_dim.d_dow = 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query8.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query8.groovy new file mode 100644 index 00000000000000..cbb6206e241d56 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query8.groovy @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query8") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + sql "set experimental_enable_virtual_slot_for_cse=true" + + def ds = """select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '47602','16704','35863','28577','83910','36201', + '58412','48162','28055','41419','80332', + '38607','77817','24891','16226','18410', + '21231','59345','13918','51089','20317', + '17167','54585','67881','78366','47770', + '18360','51717','73108','14440','21800', + '89338','45859','65501','34948','25973', + '73219','25333','17291','10374','18829', + '60736','82620','41351','52094','19326', + '25214','54207','40936','21814','79077', + '25178','75742','77454','30621','89193', + '27369','41232','48567','83041','71948', + '37119','68341','14073','16891','62878', + '49130','19833','24286','27700','40979', + '50412','81504','94835','84844','71954', + '39503','57649','18434','24987','12350', + '86379','27413','44529','98569','16515', + '27287','24255','21094','16005','56436', + '91110','68293','56455','54558','10298', + '83647','32754','27052','51766','19444', + '13869','45645','94791','57631','20712', + '37788','41807','46507','21727','71836', + '81070','50632','88086','63991','20244', + '31655','51782','29818','63792','68605', + '94898','36430','57025','20601','82080', + '33869','22728','35834','29086','92645', + '98584','98072','11652','78093','57553', + '43830','71144','53565','18700','90209', + '71256','38353','54364','28571','96560', + '57839','56355','50679','45266','84680', + '34306','34972','48530','30106','15371', + '92380','84247','92292','68852','13338', + '34594','82602','70073','98069','85066', + '47289','11686','98862','26217','47529', + '63294','51793','35926','24227','14196', + '24594','32489','99060','49472','43432', + '49211','14312','88137','47369','56877', + '20534','81755','15794','12318','21060', + '73134','41255','63073','81003','73873', + '66057','51184','51195','45676','92696', + '70450','90669','98338','25264','38919', + '59226','58581','60298','17895','19489', + '52301','80846','95464','68770','51634', + '19988','18367','18421','11618','67975', + '25494','41352','95430','15734','62585', + '97173','33773','10425','75675','53535', + '17879','41967','12197','67998','79658', + '59130','72592','14851','43933','68101', + '50636','25717','71286','24660','58058', + '72991','95042','15543','33122','69280', + '11912','59386','27642','65177','17672', + '33467','64592','36335','54010','18767', + '63193','42361','49254','33113','33159', + '36479','59080','11855','81963','31016', + '49140','29392','41836','32958','53163', + '13844','73146','23952','65148','93498', + '14530','46131','58454','13376','13378', + '83986','12320','17193','59852','46081', + '98533','52389','13086','68843','31013', + '13261','60560','13443','45533','83583', + '11489','58218','19753','22911','25115', + '86709','27156','32669','13123','51933', + '39214','41331','66943','14155','69998', + '49101','70070','35076','14242','73021', + '59494','15782','29752','37914','74686', + '83086','34473','15751','81084','49230', + '91894','60624','17819','28810','63180', + '56224','39459','55233','75752','43639', + '55349','86057','62361','50788','31830', + '58062','18218','85761','60083','45484', + '21204','90229','70041','41162','35390', + '16364','39500','68908','26689','52868', + '81335','40146','11340','61527','61794', + '71997','30415','59004','29450','58117', + '69952','33562','83833','27385','61860', + '96435','48333','23065','32961','84919', + '61997','99132','22815','56600','68730', + '48017','95694','32919','88217','27116', + '28239','58032','18884','16791','21343', + '97462','18569','75660','15475') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100""" + qt_ds_shape_8 ''' + explain shape plan + select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '47602','16704','35863','28577','83910','36201', + '58412','48162','28055','41419','80332', + '38607','77817','24891','16226','18410', + '21231','59345','13918','51089','20317', + '17167','54585','67881','78366','47770', + '18360','51717','73108','14440','21800', + '89338','45859','65501','34948','25973', + '73219','25333','17291','10374','18829', + '60736','82620','41351','52094','19326', + '25214','54207','40936','21814','79077', + '25178','75742','77454','30621','89193', + '27369','41232','48567','83041','71948', + '37119','68341','14073','16891','62878', + '49130','19833','24286','27700','40979', + '50412','81504','94835','84844','71954', + '39503','57649','18434','24987','12350', + '86379','27413','44529','98569','16515', + '27287','24255','21094','16005','56436', + '91110','68293','56455','54558','10298', + '83647','32754','27052','51766','19444', + '13869','45645','94791','57631','20712', + '37788','41807','46507','21727','71836', + '81070','50632','88086','63991','20244', + '31655','51782','29818','63792','68605', + '94898','36430','57025','20601','82080', + '33869','22728','35834','29086','92645', + '98584','98072','11652','78093','57553', + '43830','71144','53565','18700','90209', + '71256','38353','54364','28571','96560', + '57839','56355','50679','45266','84680', + '34306','34972','48530','30106','15371', + '92380','84247','92292','68852','13338', + '34594','82602','70073','98069','85066', + '47289','11686','98862','26217','47529', + '63294','51793','35926','24227','14196', + '24594','32489','99060','49472','43432', + '49211','14312','88137','47369','56877', + '20534','81755','15794','12318','21060', + '73134','41255','63073','81003','73873', + '66057','51184','51195','45676','92696', + '70450','90669','98338','25264','38919', + '59226','58581','60298','17895','19489', + '52301','80846','95464','68770','51634', + '19988','18367','18421','11618','67975', + '25494','41352','95430','15734','62585', + '97173','33773','10425','75675','53535', + '17879','41967','12197','67998','79658', + '59130','72592','14851','43933','68101', + '50636','25717','71286','24660','58058', + '72991','95042','15543','33122','69280', + '11912','59386','27642','65177','17672', + '33467','64592','36335','54010','18767', + '63193','42361','49254','33113','33159', + '36479','59080','11855','81963','31016', + '49140','29392','41836','32958','53163', + '13844','73146','23952','65148','93498', + '14530','46131','58454','13376','13378', + '83986','12320','17193','59852','46081', + '98533','52389','13086','68843','31013', + '13261','60560','13443','45533','83583', + '11489','58218','19753','22911','25115', + '86709','27156','32669','13123','51933', + '39214','41331','66943','14155','69998', + '49101','70070','35076','14242','73021', + '59494','15782','29752','37914','74686', + '83086','34473','15751','81084','49230', + '91894','60624','17819','28810','63180', + '56224','39459','55233','75752','43639', + '55349','86057','62361','50788','31830', + '58062','18218','85761','60083','45484', + '21204','90229','70041','41162','35390', + '16364','39500','68908','26689','52868', + '81335','40146','11340','61527','61794', + '71997','30415','59004','29450','58117', + '69952','33562','83833','27385','61860', + '96435','48333','23065','32961','84919', + '61997','99132','22815','56600','68730', + '48017','95694','32919','88217','27116', + '28239','58032','18884','16791','21343', + '97462','18569','75660','15475') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query80.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query80.groovy new file mode 100644 index 00000000000000..84cd9a1c132196 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query80.groovy @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query80") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('2002-08-14' as date) + and (cast('2002-08-14' as date) + interval 30 day) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('2002-08-14' as date) + and (cast('2002-08-14' as date) + interval 30 day) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('2002-08-14' as date) + and (cast('2002-08-14' as date) + interval 30 day) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , concat('store', store_id) as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , concat('catalog_page', catalog_page_id) as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , concat('web_site', web_site_id) as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100""" + qt_ds_shape_80 ''' + explain shape plan + with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('2002-08-14' as date) + and (cast('2002-08-14' as date) + interval 30 day) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('2002-08-14' as date) + and (cast('2002-08-14' as date) + interval 30 day) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('2002-08-14' as date) + and (cast('2002-08-14' as date) + interval 30 day) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , concat('store', store_id) as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , concat('catalog_page', catalog_page_id) as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , concat('web_site', web_site_id) as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query81.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query81.groovy new file mode 100644 index 00000000000000..3e2dea3e09bdbe --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query81.groovy @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query81") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =2001 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'TN' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100""" + qt_ds_shape_81 ''' + explain shape plan + with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =2001 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'TN' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query82.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query82.groovy new file mode 100644 index 00000000000000..65e93ce43d1912 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query82.groovy @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query82") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 58 and 58+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-01-13' as date) and (cast('2001-01-13' as date) + interval 60 day) + and i_manufact_id in (259,559,580,485) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100""" + qt_ds_shape_82 ''' + explain shape plan + select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 58 and 58+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-01-13' as date) and (cast('2001-01-13' as date) + interval 60 day) + and i_manufact_id in (259,559,580,485) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query83.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query83.groovy new file mode 100644 index 00000000000000..70fbc4ce0acd7e --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query83.groovy @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query83") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-07-13','2001-09-10','2001-11-16'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-07-13','2001-09-10','2001-11-16'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-07-13','2001-09-10','2001-11-16'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100""" + qt_ds_shape_83 ''' + explain shape plan + with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-07-13','2001-09-10','2001-11-16'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-07-13','2001-09-10','2001-11-16'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2001-07-13','2001-09-10','2001-11-16'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query84.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query84.groovy new file mode 100644 index 00000000000000..399e82338c13ed --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query84.groovy @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query84") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select c_customer_id as customer_id + , concat(concat(coalesce(c_last_name,''), ','), coalesce(c_first_name,'')) as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Woodland' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 60306 + and ib_upper_bound <= 60306 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100""" + qt_ds_shape_84 ''' + explain shape plan + select c_customer_id as customer_id + , concat(concat(coalesce(c_last_name,''), ','), coalesce(c_first_name,'')) as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Woodland' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 60306 + and ib_upper_bound <= 60306 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query85.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query85.groovy new file mode 100644 index 00000000000000..6352b8d9bfb05d --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query85.groovy @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query85") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'S' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'College' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('NC', 'TX', 'IA') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'WV', 'GA') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('OK', 'VA', 'KY') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100""" + qt_ds_shape_85 ''' + explain shape plan + select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'S' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'College' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('NC', 'TX', 'IA') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'WV', 'GA') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('OK', 'VA', 'KY') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query86.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query86.groovy new file mode 100644 index 00000000000000..2200b2d71b836a --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query86.groovy @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query86") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1186 and 1186+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100""" + qt_ds_shape_86 ''' + explain shape plan + select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1186 and 1186+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query87.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query87.groovy new file mode 100644 index 00000000000000..f7a38a963c5c75 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query87.groovy @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query87") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1202 and 1202+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1202 and 1202+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1202 and 1202+11) +) cool_cust +""" + qt_ds_shape_87 ''' + explain shape plan + select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1202 and 1202+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1202 and 1202+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1202 and 1202+11) +) cool_cust + + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query88.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query88.groovy new file mode 100644 index 00000000000000..e0a2c83153fb68 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query88.groovy @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query88") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s8 +""" + qt_ds_shape_88 ''' + explain shape plan + select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or + (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) + and store.s_store_name = 'ese') s8 + + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query89.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query89.groovy new file mode 100644 index 00000000000000..c0ecd01ae44cb5 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query89.groovy @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query89") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2001) and + ((i_category in ('Books','Children','Electronics') and + i_class in ('history','school-uniforms','audio') + ) + or (i_category in ('Men','Sports','Shoes') and + i_class in ('pants','tennis','womens') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100""" + qt_ds_shape_89 ''' + explain shape plan + select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2001) and + ((i_category in ('Books','Children','Electronics') and + i_class in ('history','school-uniforms','audio') + ) + or (i_category in ('Men','Sports','Shoes') and + i_class in ('pants','tennis','womens') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query9.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query9.groovy new file mode 100644 index 00000000000000..fbd7549e41034c --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query9.groovy @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query9") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + sql "set enable_parallel_result_sink=false;" + + def ds = """select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 1071 + then (select avg(ss_ext_tax) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 39161 + then (select avg(ss_ext_tax) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 29434 + then (select avg(ss_ext_tax) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 6568 + then (select avg(ss_ext_tax) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 21216 + then (select avg(ss_ext_tax) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +""" + qt_ds_shape_9 ''' + explain shape plan + select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 1071 + then (select avg(ss_ext_tax) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 39161 + then (select avg(ss_ext_tax) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 29434 + then (select avg(ss_ext_tax) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 6568 + then (select avg(ss_ext_tax) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 21216 + then (select avg(ss_ext_tax) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 + + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query90.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query90.groovy new file mode 100644 index 00000000000000..26645b7924a9b2 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query90.groovy @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query90") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 12 and 12+1 + and household_demographics.hd_dep_count = 6 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 6 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100""" + qt_ds_shape_90 ''' + explain shape plan + select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 12 and 12+1 + and household_demographics.hd_dep_count = 6 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 6 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query91.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query91.groovy new file mode 100644 index 00000000000000..3f72b439c1627d --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query91.groovy @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query91") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 2000 +and d_moy = 12 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like 'Unknown%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc""" + qt_ds_shape_91 ''' + explain shape plan + select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 2000 +and d_moy = 12 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like 'Unknown%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query92.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query92.groovy new file mode 100644 index 00000000000000..77553e938c10b8 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query92.groovy @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query92") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + sum(ws_ext_discount_amt) as "Excess Discount Amount" +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 714 +and i_item_sk = ws_item_sk +and d_date between '2000-02-01' and + (cast('2000-02-01' as date) + interval 90 day) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '2000-02-01' and + (cast('2000-02-01' as date) + interval 90 day) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100""" + qt_ds_shape_92 ''' + explain shape plan + select + sum(ws_ext_discount_amt) as "Excess Discount Amount" +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 714 +and i_item_sk = ws_item_sk +and d_date between '2000-02-01' and + (cast('2000-02-01' as date) + interval 90 day) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '2000-02-01' and + (cast('2000-02-01' as date) + interval 90 day) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query93.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query93.groovy new file mode 100644 index 00000000000000..bb5f53a01c895b --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query93.groovy @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query93") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 58') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100""" + qt_ds_shape_93 ''' + explain shape plan + select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 58') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query94.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query94.groovy new file mode 100644 index 00000000000000..a3bfc5466a61c4 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query94.groovy @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query94") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + count(distinct ws_order_number) as "order count" + ,sum(ws_ext_ship_cost) as "total shipping cost" + ,sum(ws_net_profit) as "total net profit" +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2002-5-01' and + (cast('2002-5-01' as date) + interval 60 day) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'OK' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100""" + qt_ds_shape_94 ''' + explain shape plan + select + count(distinct ws_order_number) as "order count" + ,sum(ws_ext_ship_cost) as "total shipping cost" + ,sum(ws_net_profit) as "total net profit" +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2002-5-01' and + (cast('2002-5-01' as date) + interval 60 day) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'OK' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query95.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query95.groovy new file mode 100644 index 00000000000000..dbb8d1424f52a7 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query95.groovy @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query95") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as "order count" + ,sum(ws_ext_ship_cost) as "total shipping cost" + ,sum(ws_net_profit) as "total net profit" +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + interval 60 day) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'VA' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100""" + qt_ds_shape_95 ''' + explain shape plan + with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as "order count" + ,sum(ws_ext_ship_cost) as "total shipping cost" + ,sum(ws_net_profit) as "total net profit" +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + interval 60 day) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'VA' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query96.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query96.groovy new file mode 100644 index 00000000000000..925d7866adc8d5 --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query96.groovy @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query96") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 0 + and store.s_store_name = 'ese' +order by count(*) +limit 100""" + qt_ds_shape_96 ''' + explain shape plan + select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 0 + and store.s_store_name = 'ese' +order by count(*) +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query97.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query97.groovy new file mode 100644 index 00000000000000..c799759663577b --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query97.groovy @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query97") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + multi_sql """ + use ${db}; + set enable_nereids_planner=true; + set enable_nereids_distribute_planner=false; + set enable_fallback_to_original_planner=false; + set exec_mem_limit=21G; + set be_number_for_test=3; + set enable_runtime_filter_prune=false; + set parallel_pipeline_task_num=8; + set forbid_unknown_col_stats=false; + set enable_stats=true; + set runtime_filter_type=8; + set broadcast_row_count_limit = 30000000; + set enable_nereids_timeout = false; + set enable_pipeline_engine = true; + set disable_nereids_rules='PRUNE_EMPTY_PARTITION'; + set push_topn_to_agg = true; + set topn_opt_limit_threshold=1024; + """ + + def ds = """with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1199 and 1199 + 11 and ss_sold_date_sk IS NOT NULL +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1199 and 1199 + 11 and cs_sold_date_sk IS NOT NULL +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100""" + qt_ds_shape_97 ''' + explain shape plan + with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1199 and 1199 + 11 and ss_sold_date_sk IS NOT NULL +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1199 and 1199 + 11 and cs_sold_date_sk IS NOT NULL +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query98.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query98.groovy new file mode 100644 index 00000000000000..36b2d5cb45bc1d --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query98.groovy @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query98") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Men', 'Sports', 'Jewelry') + and ss_sold_date_sk = d_date_sk + and d_date between cast('1999-02-05' as date) + and (cast('1999-02-05' as date) + interval 30 day) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio""" + qt_ds_shape_98 ''' + explain shape plan + select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Men', 'Sports', 'Jewelry') + and ss_sold_date_sk = d_date_sk + and d_date between cast('1999-02-05' as date) + and (cast('1999-02-05' as date) + interval 30 day) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio + ''' +} diff --git a/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query99.groovy b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query99.groovy new file mode 100644 index 00000000000000..5377c23acb8ccd --- /dev/null +++ b/regression-test/suites/shape_check/tpcds_sf1000_nopkfk/shape/query99.groovy @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("query99") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + if (isCloudMode()) { + return + } + sql "use ${db}" + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql 'set enable_fallback_to_original_planner=false' + sql 'set exec_mem_limit=21G' + sql 'set be_number_for_test=3' + sql 'set parallel_pipeline_task_num=8; ' + sql 'set forbid_unknown_col_stats=true' + sql 'set enable_nereids_timeout = false' + sql 'set enable_runtime_filter_prune=false' + sql 'set runtime_filter_type=8' + sql 'set dump_nereids_memo=false' + sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" + + def ds = """select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days" + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days" + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days" + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days" + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as ">120 days" +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1194 and 1194 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100""" + qt_ds_shape_99 ''' + explain shape plan + select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days" + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days" + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days" + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days" + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as ">120 days" +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1194 and 1194 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 + ''' +} From 5b067219c02ee11b37a7c76eb6b7d18a96a056a9 Mon Sep 17 00:00:00 2001 From: englefly Date: Tue, 6 Jan 2026 18:30:18 +0800 Subject: [PATCH 03/21] =?UTF-8?q?mode=3D1=20=E6=97=B6=20=E5=8D=B3=E4=BD=BF?= =?UTF-8?q?=E6=B2=A1=E6=9C=89=E7=BB=8F=E8=BF=87big=20join=20=E4=B9=9F?= =?UTF-8?q?=E8=A6=81=20=E5=BC=BA=E5=88=B6=20=E4=B8=8B=E6=8E=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../eageraggregation/EagerAggRewriter.java | 22 ++++++++++--------- .../eageraggregation/PushDownAggregation.java | 3 +-- .../eageraggregation/SumAggWriter.java | 7 ++---- .../org/apache/doris/qe/SessionVariable.java | 11 +++++++++- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java index f7ed45777f4e6c..9293e962e31221 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java @@ -36,7 +36,6 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; import org.apache.doris.nereids.util.ExpressionUtils; -import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.Statistics; @@ -119,7 +118,8 @@ public Plan visitLogicalJoin(LogicalJoin join, P if (stats == null) { stats = join.right().accept(derive, new StatsDerive.DeriveContext()); } - if (stats.getRowCount() > PushDownAggContext.BIG_JOIN_BUILD_SIZE) { + if (stats.getRowCount() > PushDownAggContext.BIG_JOIN_BUILD_SIZE + || SessionVariable.getEagerAggregationMode() > 0) { childContext = childContext.passThroughBigJoin(); } if (toLeft) { @@ -308,19 +308,21 @@ private Plan genAggregate(Plan child, PushDownAggContext context) { } private boolean checkStats(Plan plan, PushDownAggContext context) { - if (!context.isPassThroughBigJoin()) { - return false; - } - if (ConnectContext.get() == null) { - return false; - } - int mode = ConnectContext.get().getSessionVariable().eagerAggregationMode; + int mode = SessionVariable.getEagerAggregationMode(); if (mode < 0) { return false; } + if (mode > 0) { - return true; + // when mode=1, any join is regarded as big join in order to + // push down aggregation through at least one join + return context.isPassThroughBigJoin(); + } + + if (!context.isPassThroughBigJoin()) { + return false; } + Statistics stats = plan.getStats(); if (stats == null) { stats = plan.accept(derive, new StatsDerive.DeriveContext()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java index 221b234117ac4a..bc3393bbf49de7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java @@ -60,7 +60,6 @@ import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.util.ExpressionUtils; -import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; import com.google.common.collect.Sets; @@ -97,7 +96,7 @@ public class PushDownAggregation extends DefaultPlanRewriter impleme @Override public Plan rewriteRoot(Plan plan, JobContext jobContext) { - int mode = ConnectContext.get().getSessionVariable().eagerAggregationMode; + int mode = SessionVariable.getEagerAggregationMode(); if (mode < 0) { return plan; } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java index 27a2165f42233a..5b87a482279eef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java @@ -35,7 +35,7 @@ import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.util.ExpressionUtils; -import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.SessionVariable; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.Statistics; @@ -228,10 +228,7 @@ private Plan genAggregate(Plan child, SumAggContext context) { } private boolean checkStats(Plan plan, SumAggContext context) { - if (ConnectContext.get() == null) { - return false; - } - int mode = ConnectContext.get().getSessionVariable().eagerAggregationMode; + int mode = SessionVariable.getEagerAggregationMode(); if (mode < 0) { return false; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 38f9e252f26d5d..0fe871e864709a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -2191,7 +2191,16 @@ public boolean isEnableHboNonStrictMatchingMode() { + "1: force eager aggregation, " + "-1: Prohibit eager aggregation "} ) - public int eagerAggregationMode = 0; + private int eagerAggregationMode = 0; + + public static int getEagerAggregationMode() { + if (ConnectContext.get() != null) { + return ConnectContext.get().getSessionVariable().eagerAggregationMode; + } else { + return VariableMgr.getDefaultSessionVariable().eagerAggregationMode; + } + } + @VariableMgr.VarAttr( name = ENABLE_PAGE_CACHE, From 123f245a8feda412ad2debea7d4506cafc30b0e6 Mon Sep 17 00:00:00 2001 From: englefly Date: Tue, 6 Jan 2026 20:19:29 +0800 Subject: [PATCH 04/21] do not support avg/count --- .../eageraggregation/PushDownAggregation.java | 27 +++---------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java index bc3393bbf49de7..d20877aee1647e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java @@ -83,8 +83,6 @@ public class PushDownAggregation extends DefaultPlanRewriter impleme private final Set pushDownAggFunctionSet = Sets.newHashSet( Sum.class, - Count.class, - Avg.class, Max.class, Min.class); @@ -118,28 +116,10 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte List aggFunctions = new ArrayList<>(); - Map avgToSumCountMap = new HashMap<>(); for (AggregateFunction aggFunction : agg.getAggregateFunctions()) { if (pushDownAggFunctionSet.contains(aggFunction.getClass()) - && !aggFunction.isDistinct() - && (!(aggFunction instanceof Count) || (!((Count) aggFunction).isCountStar()))) { - if (aggFunction instanceof Avg) { - DataType targetType = aggFunction.getDataType(); - Sum sum = new Sum(aggFunction.child(0)); - Count count = new Count(aggFunction.child(0)); - if (!aggFunctions.contains(sum)) { - aggFunctions.add(sum); - } - if (!aggFunctions.contains(count)) { - aggFunctions.add(count); - } - Expression castSum = targetType.equals(sum.getDataType()) ? sum : new Cast(sum, targetType); - Expression castCount = targetType.equals(count.getDataType()) ? count : new Cast(count, targetType); - avgToSumCountMap.put((Avg) aggFunction, - new Divide(castSum, castCount)); - } else { - aggFunctions.add(aggFunction); - } + && !aggFunction.isDistinct()) { + aggFunctions.add(aggFunction); } else { return agg; } @@ -183,8 +163,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte if (ne instanceof SlotReference) { newOutputExpressions.add(ne); } else { - Expression rewriteAvgExpr = ExpressionUtils.replace(ne, avgToSumCountMap); - NamedExpression replaceAliasExpr = (NamedExpression) rewriteAvgExpr + NamedExpression replaceAliasExpr = (NamedExpression) ne .rewriteDownShortCircuit(e -> { Alias alias = pushDownContext.getAliasMap().get(e); if (alias != null) { From d7fe0c0e44aeadf8494089909d69d3b8b2986fb1 Mon Sep 17 00:00:00 2001 From: englefly Date: Tue, 6 Jan 2026 20:31:30 +0800 Subject: [PATCH 05/21] group key only slotreference --- .../eageraggregation/EagerAggRewriter.java | 24 ++++++------------- .../eageraggregation/PushDownAggContext.java | 10 ++++---- .../eageraggregation/PushDownAggregation.java | 2 +- .../trees/plans/logical/LogicalProject.java | 16 +++++++------ 4 files changed, 22 insertions(+), 30 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java index 9293e962e31221..dc0816e70c73c8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java @@ -20,7 +20,6 @@ import org.apache.doris.nereids.rules.analysis.NormalizeAggregate; import org.apache.doris.nereids.rules.rewrite.StatsDerive; import org.apache.doris.nereids.stats.ExpressionEstimation; -// import org.apache.doris.nereids.stats.StatsCalculator; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; @@ -90,17 +89,17 @@ public Plan visitLogicalJoin(LogicalJoin join, P } List joinConditionSlots; - List childGroupByKeys = new ArrayList<>(); + List childGroupByKeys = new ArrayList<>(); if (toLeft) { joinConditionSlots = getJoinConditionsInputSlotsFromOneSide(join, join.left()); - for (NamedExpression key : context.getGroupKeys()) { + for (SlotReference key : context.getGroupKeys()) { if (join.left().getOutputSet().containsAll(key.getInputSlots())) { childGroupByKeys.add(key); } } } else { joinConditionSlots = getJoinConditionsInputSlotsFromOneSide(join, join.right()); - for (NamedExpression key : context.getGroupKeys()) { + for (SlotReference key : context.getGroupKeys()) { if (join.right().getOutputSet().containsAll(key.getInputSlots())) { childGroupByKeys.add(key); } @@ -160,11 +159,9 @@ private List getJoinConditionsInputSlotsFromOneSide(LogicalJoin project, PushDownAggContext context) { - HashMap replaceMapAliasBody = new HashMap<>(); HashMap replaceMapAlias = new HashMap<>(); for (NamedExpression ne : project.getProjects()) { if (ne instanceof Alias) { - replaceMapAliasBody.put(ne.toSlot(), ((Alias) ne).child()); replaceMapAlias.put(ne.toSlot(), ne); } } @@ -175,22 +172,15 @@ private PushDownAggContext createContextFromProject(LogicalProject groupKeys = new ArrayList<>(); - for (NamedExpression key : context.getGroupKeys()) { - NamedExpression newKey; - if (key instanceof Alias) { - newKey = (Alias) ExpressionUtils.replace(key, replaceMapAliasBody); - } else { - // key is slot - newKey = (NamedExpression) replaceMapAlias.getOrDefault(key, key); - } - groupKeys.add(newKey); + List groupKeys = new ArrayList<>(); + for (SlotReference key : context.getGroupKeys()) { + groupKeys.add((SlotReference) project.pushDownExpressionPastProject(key)); } List aggFunctions = new ArrayList<>(); Map aliasMap = new HashMap<>(); for (AggregateFunction aggFunc : context.getAggFunctions()) { - AggregateFunction newAggFunc = (AggregateFunction) ExpressionUtils.replace(aggFunc, replaceMapAliasBody); + AggregateFunction newAggFunc = (AggregateFunction) project.pushDownExpressionPastProject(aggFunc); Alias alias = context.getAliasMap().get(aggFunc); aliasMap.put(newAggFunc, (Alias) alias.withChildren(newAggFunc)); aggFunctions.add(newAggFunc); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java index e9475bf7ac1d4c..23c6d9aea4d56a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java @@ -39,7 +39,7 @@ public class PushDownAggContext { public static final int BIG_JOIN_BUILD_SIZE = 400_000; private final List aggFunctions; - private final List groupKeys; + private final List groupKeys; private final Map aliasMap; private final Set aggFunctionsInputSlots; @@ -55,7 +55,7 @@ public class PushDownAggContext { * constructor */ public PushDownAggContext(List aggFunctions, - List groupKeys, + List groupKeys, CascadesContext cascadesContext) { this(aggFunctions, groupKeys, null, cascadesContext, false); } @@ -64,7 +64,7 @@ public PushDownAggContext(List aggFunctions, * constructor */ public PushDownAggContext(List aggFunctions, - List groupKeys, Map aliasMap, CascadesContext cascadesContext, + List groupKeys, Map aliasMap, CascadesContext cascadesContext, boolean passThroughBigJoin) { this.groupKeys = groupKeys; this.aggFunctions = ImmutableList.copyOf(aggFunctions); @@ -100,11 +100,11 @@ public List getAggFunctions() { return aggFunctions; } - public List getGroupKeys() { + public List getGroupKeys() { return groupKeys; } - public PushDownAggContext withGroupKeys(List groupKeys) { + public PushDownAggContext withGroupKeys(List groupKeys) { return new PushDownAggContext(aggFunctions, groupKeys, aliasMap, cascadesContext, passThroughBigJoin); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java index d20877aee1647e..e504cbc2a5ca80 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java @@ -129,7 +129,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte return agg; } - List groupKeys = new ArrayList<>(); + List groupKeys = new ArrayList<>(); for (Expression groupKey : agg.getGroupByExpressions()) { if (groupKey instanceof SlotReference) { groupKeys.add((SlotReference) groupKey); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java index fef5e46f2c07a7..68e8f8623f9739 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java @@ -63,6 +63,8 @@ public class LogicalProject extends LogicalUnary> projectsSet; private final boolean isDistinct; + private final HashMap projectMap; + public LogicalProject(List projects, CHILD_TYPE child) { this(projects, false, ImmutableList.of(child)); } @@ -90,6 +92,13 @@ private LogicalProject(List projects, boolean isDistinct, : projects; this.projectsSet = Suppliers.memoize(() -> Utils.fastToImmutableSet(this.projects)); this.isDistinct = isDistinct; + this.projectMap = new HashMap<>(); + for (NamedExpression namedExpression : projects) { + if (namedExpression instanceof Alias) { + Alias alias = (Alias) namedExpression; + projectMap.put(alias.toSlot(), alias.child()); + } + } } /** @@ -315,13 +324,6 @@ public void computeFd(DataTrait.Builder builder) { * */ public Expression pushDownExpressionPastProject(Expression expression) { - HashMap projectMap = new HashMap(); - for (NamedExpression namedExpression : projects) { - if (namedExpression instanceof Alias) { - Alias alias = (Alias) namedExpression; - projectMap.put(alias.toSlot(), alias.child()); - } - } return ExpressionUtils.replace(expression, projectMap); } } From 1589228d0db78c2452b0fe6e68bd53cf84f24d65 Mon Sep 17 00:00:00 2001 From: englefly Date: Wed, 7 Jan 2026 11:04:14 +0800 Subject: [PATCH 06/21] push agg on join --- .../eageraggregation/EagerAggRewriter.java | 51 +- .../eageraggregation/PushDownAggContext.java | 1 - .../eageraggregation/PushDownAggregation.java | 16 +- .../org/apache/doris/qe/SessionVariable.java | 9 + .../data/nereids_p0/eager_agg/eager_agg.out | 144 + .../tpcds_sf1000_nopkfk/shape/query37.out | 25 +- .../tpcds_sf1000_nopkfk/shape/query82.out | 25 +- .../nereids_p0/eager_agg/eager_agg.groovy | 108 + .../suites/nereids_p0/eager_agg/load.groovy | 213 + .../shape_check/tpcds_sf1000/load.groovy | 5080 ++++++++--------- 10 files changed, 3083 insertions(+), 2589 deletions(-) create mode 100644 regression-test/data/nereids_p0/eager_agg/eager_agg.out create mode 100644 regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy create mode 100644 regression-test/suites/nereids_p0/eager_agg/load.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java index dc0816e70c73c8..db3c85e030e403 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java @@ -34,7 +34,6 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; -import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.qe.SessionVariable; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.Statistics; @@ -71,19 +70,41 @@ public class EagerAggRewriter extends DefaultPlanRewriter { @Override public Plan visitLogicalJoin(LogicalJoin join, PushDownAggContext context) { - List pushToLeft = new ArrayList<>(); - List pushToRight = new ArrayList<>(); - boolean toLeft = true; - boolean toRight = true; - for (AggregateFunction aggFunc : context.getAggFunctions()) { - if (join.left().getOutputSet().containsAll(aggFunc.getInputSlots())) { - pushToLeft.add(aggFunc); - toRight = false; - } else if (join.right().getOutputSet().containsAll(aggFunc.getInputSlots())) { - pushToRight.add(aggFunc); - toLeft = false; + boolean toLeft = false; + boolean toRight = false; + boolean pushHere = false; + if (context.getAggFunctions().isEmpty()) { + // example: select x from T group by x + // if no agg function, try to push to large child + Statistics leftStats = join.left().getStats(); + if (leftStats == null) { + leftStats = join.left().accept(derive, new StatsDerive.DeriveContext()); + } + Statistics rightStats = join.right().getStats(); + if (rightStats == null) { + rightStats = join.right().accept(derive, new StatsDerive.DeriveContext()); + } + if (leftStats.getRowCount() > rightStats.getRowCount()) { + toLeft = true; + } else { + toRight = true; } - if (toLeft == toRight) { + } else { + for (AggregateFunction aggFunc : context.getAggFunctions()) { + if (join.left().getOutputSet().containsAll(aggFunc.getInputSlots())) { + toLeft = true; + } else if (join.right().getOutputSet().containsAll(aggFunc.getInputSlots())) { + toRight = true; + } else { + pushHere = true; + } + } + } + + if (pushHere || (toLeft && toRight)) { + if (SessionVariable.isEagerAggregationOnJoin()) { + return genAggregate(join, context); + } else { return join; } } @@ -174,7 +195,9 @@ private PushDownAggContext createContextFromProject(LogicalProject groupKeys = new ArrayList<>(); for (SlotReference key : context.getGroupKeys()) { - groupKeys.add((SlotReference) project.pushDownExpressionPastProject(key)); + groupKeys.addAll( + project.pushDownExpressionPastProject(key).getInputSlots() + .stream().map(slot -> (SlotReference) slot).collect(Collectors.toList())); } List aggFunctions = new ArrayList<>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java index 23c6d9aea4d56a..f61d06e6519374 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java @@ -19,7 +19,6 @@ import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.trees.expressions.Alias; -import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java index e504cbc2a5ca80..f38b634ff40ec4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java @@ -38,14 +38,10 @@ import org.apache.doris.nereids.rules.analysis.NormalizeAggregate; import org.apache.doris.nereids.rules.rewrite.AdjustNullable; import org.apache.doris.nereids.trees.expressions.Alias; -import org.apache.doris.nereids.trees.expressions.Cast; -import org.apache.doris.nereids.trees.expressions.Divide; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; -import org.apache.doris.nereids.trees.expressions.functions.agg.Avg; -import org.apache.doris.nereids.trees.expressions.functions.agg.Count; import org.apache.doris.nereids.trees.expressions.functions.agg.Max; import org.apache.doris.nereids.trees.expressions.functions.agg.Min; import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; @@ -58,8 +54,6 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; -import org.apache.doris.nereids.types.DataType; -import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.qe.SessionVariable; import com.google.common.collect.Sets; @@ -67,9 +61,7 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Set; @@ -145,7 +137,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte PushDownAggContext pushDownContext = new PushDownAggContext(new ArrayList<>(aggFunctions), groupKeys, context.getCascadesContext()); - try { + //try { Plan child = agg.child().accept(writer, pushDownContext); if (child != agg.child()) { // agg has been pushed down, rewrite agg output expressions @@ -184,9 +176,9 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte AdjustNullable adjustNullable = new AdjustNullable(false, false); return adjustNullable.rewriteRoot(normalized, null); } - } catch (RuntimeException e) { - LOG.info("PushDownAggregation failed: " + e.getMessage() + "\n" + agg.treeString()); - } + //} catch (RuntimeException e) { + // LOG.info("PushDownAggregation failed: " + e.getMessage() + "\n" + agg.treeString()); + //} return agg; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 0fe871e864709a..373b92daa39991 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -2201,6 +2201,15 @@ public static int getEagerAggregationMode() { } } + @VariableMgr.VarAttr(name = "eager_aggregation_on_join", needForward = true) + public boolean eagerAggregationOnJoin = false; + public static boolean isEagerAggregationOnJoin() { + if (ConnectContext.get() != null) { + return ConnectContext.get().getSessionVariable().eagerAggregationOnJoin; + } else { + return VariableMgr.getDefaultSessionVariable().eagerAggregationOnJoin; + } + } @VariableMgr.VarAttr( name = ENABLE_PAGE_CACHE, diff --git a/regression-test/data/nereids_p0/eager_agg/eager_agg.out b/regression-test/data/nereids_p0/eager_agg/eager_agg.out new file mode 100644 index 00000000000000..ad2c07d5f1da2f --- /dev/null +++ b/regression-test/data/nereids_p0/eager_agg/eager_agg.out @@ -0,0 +1,144 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !a -- +PhysicalResultSink +--hashAgg[GLOBAL] +----PhysicalDistribute[DistributionSpecHash] +------hashAgg[LOCAL] +--------PhysicalProject +----------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=() +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalOlapScan[store_sales(ss)] +----------------------PhysicalProject +------------------------PhysicalOlapScan[web_sales(ws)] +------------PhysicalProject +--------------PhysicalOlapScan[date_dim(dt)] + +Hint log: +Used: leading({ ss ws } dt ) +UnUsed: +SyntaxError: + +-- !a2 -- +PhysicalResultSink +--hashAgg[GLOBAL] +----PhysicalDistribute[DistributionSpecHash] +------hashAgg[LOCAL] +--------PhysicalProject +----------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=() +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalOlapScan[store_sales(ss)] +----------------------PhysicalProject +------------------------PhysicalOlapScan[web_sales(ws)] +------------PhysicalProject +--------------PhysicalOlapScan[date_dim(dt)] + +Hint log: +Used: leading({ ss ws } dt ) +UnUsed: +SyntaxError: + +-- !sum_min_max -- +PhysicalResultSink +--hashAgg[GLOBAL] +----PhysicalDistribute[DistributionSpecHash] +------hashAgg[LOCAL] +--------PhysicalProject +----------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=() +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() +----------------------PhysicalProject +------------------------PhysicalOlapScan[store_sales(ss)] +----------------------PhysicalProject +------------------------PhysicalOlapScan[web_sales(ws)] +------------PhysicalProject +--------------PhysicalOlapScan[date_dim(dt)] + +Hint log: +Used: leading({ ss ws } dt ) +UnUsed: +SyntaxError: + +-- !avg_count -- +PhysicalResultSink +--hashAgg[GLOBAL] +----PhysicalDistribute[DistributionSpecHash] +------hashAgg[LOCAL] +--------PhysicalProject +----------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=() +------------PhysicalProject +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() +----------------PhysicalProject +------------------PhysicalOlapScan[store_sales(ss)] +----------------PhysicalProject +------------------PhysicalOlapScan[web_sales(ws)] +------------PhysicalProject +--------------PhysicalOlapScan[date_dim(dt)] + +Hint log: +Used: leading({ ss ws } dt ) +UnUsed: +SyntaxError: + +-- !groupkey_push_SS_JOIN_D -- +PhysicalResultSink +--PhysicalProject +----hashAgg[GLOBAL] +------PhysicalDistribute[DistributionSpecHash] +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=() +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store_sales(ss)] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[date_dim(dt)] +--------------PhysicalProject +----------------PhysicalOlapScan[web_sales(ws)] + +Hint log: +Used: leading({ ss dt } ws ) +UnUsed: +SyntaxError: + +-- !groupkey_push -- +PhysicalResultSink +--PhysicalProject +----hashAgg[GLOBAL] +------PhysicalDistribute[DistributionSpecHash] +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() +--------------PhysicalProject +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=() +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store_sales(ss)] +------------------PhysicalProject +--------------------PhysicalOlapScan[date_dim(dt)] +--------------PhysicalProject +----------------PhysicalOlapScan[web_sales(ws)] + +Hint log: +Used: leading({ ss dt } ws ) +UnUsed: +SyntaxError: + diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query37.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query37.out index 149a61f7b37054..be81f11399d997 100644 --- a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query37.out +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query37.out @@ -8,20 +8,23 @@ PhysicalResultSink ----------PhysicalDistribute[DistributionSpecHash] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[inv_date_sk] ------------------PhysicalProject ---------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk] +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk] ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk] ---------------------------PhysicalProject -----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100)) -------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1 +------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100)) +--------------------------PhysicalOlapScan[inventory] apply RFs: RF1 RF2 +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 --------------------------PhysicalProject ----------------------------filter((item.i_current_price <= 59.00) and (item.i_current_price >= 29.00) and i_manufact_id IN (705, 742, 777, 944)) ------------------------------PhysicalOlapScan[item] -----------------------PhysicalProject -------------------------filter((date_dim.d_date <= '2002-05-28') and (date_dim.d_date >= '2002-03-29')) ---------------------------PhysicalOlapScan[date_dim] +------------------PhysicalProject +--------------------filter((date_dim.d_date <= '2002-05-28') and (date_dim.d_date >= '2002-03-29')) +----------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query82.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query82.out index a1bb3a33e1d0ff..c7c9bfe0d57fe6 100644 --- a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query82.out +++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query82.out @@ -8,20 +8,23 @@ PhysicalResultSink ----------PhysicalDistribute[DistributionSpecHash] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[inv_date_sk] ------------------PhysicalProject ---------------------PhysicalOlapScan[store_sales] apply RFs: RF2 -------------------PhysicalProject ---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk] +--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk] ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk] ---------------------------PhysicalProject -----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100)) -------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1 +------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100)) +--------------------------PhysicalOlapScan[inventory] apply RFs: RF1 RF2 +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +--------------------------hashAgg[GLOBAL] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashAgg[LOCAL] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 --------------------------PhysicalProject ----------------------------filter((item.i_current_price <= 88.00) and (item.i_current_price >= 58.00) and i_manufact_id IN (259, 485, 559, 580)) ------------------------------PhysicalOlapScan[item] -----------------------PhysicalProject -------------------------filter((date_dim.d_date <= '2001-03-14') and (date_dim.d_date >= '2001-01-13')) ---------------------------PhysicalOlapScan[date_dim] +------------------PhysicalProject +--------------------filter((date_dim.d_date <= '2001-03-14') and (date_dim.d_date >= '2001-01-13')) +----------------------PhysicalOlapScan[date_dim] diff --git a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy new file mode 100644 index 00000000000000..fe829bb86a121c --- /dev/null +++ b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("eager_agg") { + sql """ + set eager_aggregation_mode=1; + set eager_aggregation_on_join=true; + """ + + // push to ss-join-ws + qt_a """ + explain shape plan + select /*+leading({ss ws} dt)*/ dt.d_year + ,sum(ws_list_price) brand + ,sum(ss_sales_price) sum_agg + from date_dim dt + ,store_sales ss + ,web_sales ws + where dt.d_date_sk = ss_sold_date_sk + and ss_item_sk = ws_item_sk + group by dt.d_year + """ + + // push to ss-join-ws + qt_a2 """ + explain shape plan + select /*+leading({ss ws} dt)*/ dt.d_year + ,sum(ws_list_price + ss_sales_price) brand + + from date_dim dt + ,store_sales ss + ,web_sales ws + where dt.d_date_sk = ss_sold_date_sk + and ss_item_sk = ws_item_sk + group by dt.d_year + """ + + // push sum/min/max aggFunc + qt_sum_min_max """ + explain shape plan + select /*+leading({ss ws} dt)*/ dt.d_year + ,sum(ws_list_price) brand + ,min(ss_sales_price) min_agg + ,max(ss_sales_price) max_agg + from date_dim dt + ,store_sales ss + ,web_sales ws + where dt.d_date_sk = ss_sold_date_sk + and ss_item_sk = ws_item_sk + group by dt.d_year + """ + + + // do not push avg/count aggFunc + qt_avg_count """ + explain shape plan + select /*+leading({ss ws} dt)*/ dt.d_year + ,avg(ws_list_price) + from date_dim dt + ,store_sales ss + ,web_sales ws + where dt.d_date_sk = ss_sold_date_sk + and ss_item_sk = ws_item_sk + group by dt.d_year + """ + + // agg push to ss-d + qt_groupkey_push_SS_JOIN_D """ + explain shape plan + select /*+leading({ss dt} ws)*/ dt.d_year + ,sum(ss_wholesale_cost) brand + ,sum(ss_sales_price + d_moy) sum_agg + from store_sales ss + join date_dim dt + join web_sales ws + where dt.d_date_sk = ss_sold_date_sk + and ss_item_sk = ws_item_sk + group by dt.d_year, ss_hdemo_sk + ws_quantity + """ + + // group key: ss_hdemo_sk + d_moy => push to ss-d + qt_groupkey_push """ + explain shape plan + select /*+leading({ss dt} ws)*/ dt.d_year + ,sum(ss_wholesale_cost) brand + ,sum(ss_sales_price) sum_agg + from store_sales ss + join date_dim dt + join web_sales ws + where dt.d_date_sk = ss_sold_date_sk + and ss_item_sk = ws_item_sk + group by dt.d_year, ss_hdemo_sk + d_moy + """ +} diff --git a/regression-test/suites/nereids_p0/eager_agg/load.groovy b/regression-test/suites/nereids_p0/eager_agg/load.groovy new file mode 100644 index 00000000000000..2cb27d49d642da --- /dev/null +++ b/regression-test/suites/nereids_p0/eager_agg/load.groovy @@ -0,0 +1,213 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("load") { + sql """ + drop table if exists store_sales; + drop table if exists date_dim; + drop table if exists web_sales; + + CREATE TABLE `store_sales` ( + `ss_sold_date_sk` bigint NULL, + `ss_sold_time_sk` bigint NULL, + `ss_item_sk` bigint NULL, + `ss_customer_sk` bigint NULL, + `ss_cdemo_sk` bigint NULL, + `ss_hdemo_sk` bigint NULL, + `ss_addr_sk` bigint NULL, + `ss_store_sk` bigint NULL, + `ss_promo_sk` bigint NULL, + `ss_ticket_number` bigint NULL, + `ss_quantity` int NULL, + `ss_wholesale_cost` decimal(7,2) NULL, + `ss_list_price` decimal(7,2) NULL, + `ss_sales_price` decimal(7,2) NULL, + `ss_ext_discount_amt` decimal(7,2) NULL, + `ss_ext_sales_price` decimal(7,2) NULL, + `ss_ext_wholesale_cost` decimal(7,2) NULL, + `ss_ext_list_price` decimal(7,2) NULL, + `ss_ext_tax` decimal(7,2) NULL, + `ss_coupon_amt` decimal(7,2) NULL, + `ss_net_paid` decimal(7,2) NULL, + `ss_net_paid_inc_tax` decimal(7,2) NULL, + `ss_net_profit` decimal(7,2) NULL +) ENGINE=OLAP +DUPLICATE KEY(`ss_sold_date_sk`, `ss_sold_time_sk`, `ss_item_sk`, `ss_customer_sk`) +DISTRIBUTED BY HASH(`ss_customer_sk`) BUCKETS 3 +PROPERTIES ( +"replication_allocation" = "tag.location.default: 1", +"min_load_replica_num" = "-1", +"is_being_synced" = "false", +"storage_medium" = "hdd", +"storage_format" = "V2", +"inverted_index_storage_format" = "V3", +"light_schema_change" = "true", +"disable_auto_compaction" = "false", +"enable_single_replica_compaction" = "false", +"group_commit_interval_ms" = "10000", +"group_commit_data_bytes" = "134217728" +); + +CREATE TABLE `date_dim` ( + `d_date_sk` bigint NULL, + `d_date_id` char(16) NULL, + `d_date` date NULL, + `d_month_seq` int NULL, + `d_week_seq` int NULL, + `d_quarter_seq` int NULL, + `d_year` int NULL, + `d_dow` int NULL, + `d_moy` int NULL, + `d_dom` int NULL, + `d_qoy` int NULL, + `d_fy_year` int NULL, + `d_fy_quarter_seq` int NULL, + `d_fy_week_seq` int NULL, + `d_day_name` char(9) NULL, + `d_quarter_name` char(6) NULL, + `d_holiday` char(1) NULL, + `d_weekend` char(1) NULL, + `d_following_holiday` char(1) NULL, + `d_first_dom` int NULL, + `d_last_dom` int NULL, + `d_same_day_ly` int NULL, + `d_same_day_lq` int NULL, + `d_current_day` char(1) NULL, + `d_current_week` char(1) NULL, + `d_current_month` char(1) NULL, + `d_current_quarter` char(1) NULL, + `d_current_year` char(1) NULL +) ENGINE=OLAP +DUPLICATE KEY(`d_date_sk`, `d_date_id`) +DISTRIBUTED BY HASH(`d_date_id`) BUCKETS 3 +PROPERTIES ( +"replication_allocation" = "tag.location.default: 1", +"min_load_replica_num" = "-1", +"is_being_synced" = "false", +"storage_medium" = "hdd", +"storage_format" = "V2", +"inverted_index_storage_format" = "V3", +"light_schema_change" = "true", +"disable_auto_compaction" = "false", +"enable_single_replica_compaction" = "false", +"group_commit_interval_ms" = "10000", +"group_commit_data_bytes" = "134217728" +); + +CREATE TABLE `web_sales` ( + `ws_sold_date_sk` bigint NULL, + `ws_sold_time_sk` bigint NULL, + `ws_ship_date_sk` bigint NULL, + `ws_item_sk` bigint NULL, + `ws_bill_customer_sk` bigint NULL, + `ws_bill_cdemo_sk` bigint NULL, + `ws_bill_hdemo_sk` bigint NULL, + `ws_bill_addr_sk` bigint NULL, + `ws_ship_customer_sk` bigint NULL, + `ws_ship_cdemo_sk` bigint NULL, + `ws_ship_hdemo_sk` bigint NULL, + `ws_ship_addr_sk` bigint NULL, + `ws_web_page_sk` bigint NULL, + `ws_web_site_sk` bigint NULL, + `ws_ship_mode_sk` bigint NULL, + `ws_warehouse_sk` bigint NULL, + `ws_promo_sk` bigint NULL, + `ws_order_number` bigint NULL, + `ws_quantity` int NULL, + `ws_wholesale_cost` decimal(7,2) NULL, + `ws_list_price` decimal(7,2) NULL, + `ws_sales_price` decimal(7,2) NULL, + `ws_ext_discount_amt` decimal(7,2) NULL, + `ws_ext_sales_price` decimal(7,2) NULL, + `ws_ext_wholesale_cost` decimal(7,2) NULL, + `ws_ext_list_price` decimal(7,2) NULL, + `ws_ext_tax` decimal(7,2) NULL, + `ws_coupon_amt` decimal(7,2) NULL, + `ws_ext_ship_cost` decimal(7,2) NULL, + `ws_net_paid` decimal(7,2) NULL, + `ws_net_paid_inc_tax` decimal(7,2) NULL, + `ws_net_paid_inc_ship` decimal(7,2) NULL, + `ws_net_paid_inc_ship_tax` decimal(7,2) NULL, + `ws_net_profit` decimal(7,2) NULL +) ENGINE=OLAP +DUPLICATE KEY(`ws_sold_date_sk`, `ws_sold_time_sk`, `ws_ship_date_sk`, `ws_item_sk`) +DISTRIBUTED BY HASH(`ws_item_sk`) BUCKETS 3 +PROPERTIES ( +"replication_allocation" = "tag.location.default: 1", +"min_load_replica_num" = "-1", +"is_being_synced" = "false", +"storage_medium" = "hdd", +"storage_format" = "V2", +"inverted_index_storage_format" = "V3", +"light_schema_change" = "true", +"disable_auto_compaction" = "false", +"enable_single_replica_compaction" = "false", +"group_commit_interval_ms" = "10000", +"group_commit_data_bytes" = "134217728" +); + +INSERT INTO store_sales ( + ss_sold_date_sk, ss_sold_time_sk, ss_item_sk, ss_customer_sk, ss_cdemo_sk, ss_hdemo_sk, + ss_addr_sk, ss_store_sk, ss_promo_sk, ss_ticket_number, ss_quantity, + ss_wholesale_cost, ss_list_price, ss_sales_price, ss_ext_discount_amt, + ss_ext_sales_price, ss_ext_wholesale_cost, ss_ext_list_price, ss_ext_tax, + ss_coupon_amt, ss_net_paid, ss_net_paid_inc_tax, ss_net_profit +) VALUES ( + 20240101, 36000, 1001, 501, 601, 701, + 801, 901, 10001, 55500001, 2, + 10.00, 12.00, 11.00, 2.00, + 22.00, 20.00, 24.00, 1.54, + 0.00, 22.00, 23.54, 3.54 +); + +INSERT INTO date_dim ( + d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq, d_quarter_seq, d_year, + d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq, d_fy_week_seq, + d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday, + d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq, + d_current_day, d_current_week, d_current_month, d_current_quarter, d_current_year +) VALUES ( + 20240101, '2024-01-01', '2024-01-01', 1, 1, 1, 2024, + 1, 1, 1, 1, 2024, 1, 1, + 'MON', 'Q1', 'N', 'N', 'N', + 1, 31, 20230101, 20231001, + 'Y', 'Y', 'Y', 'Y', 'Y' +); + +INSERT INTO web_sales ( + ws_sold_date_sk, ws_sold_time_sk, ws_ship_date_sk, ws_item_sk, + ws_bill_customer_sk, ws_bill_cdemo_sk, ws_bill_hdemo_sk, ws_bill_addr_sk, + ws_ship_customer_sk, ws_ship_cdemo_sk, ws_ship_hdemo_sk, ws_ship_addr_sk, + ws_web_page_sk, ws_web_site_sk, ws_ship_mode_sk, ws_warehouse_sk, ws_promo_sk, + ws_order_number, ws_quantity, ws_wholesale_cost, ws_list_price, ws_sales_price, + ws_ext_discount_amt, ws_ext_sales_price, ws_ext_wholesale_cost, ws_ext_list_price, + ws_ext_tax, ws_coupon_amt, ws_ext_ship_cost, ws_net_paid, ws_net_paid_inc_tax, + ws_net_paid_inc_ship, ws_net_paid_inc_ship_tax, ws_net_profit +) VALUES ( + 20240101, 43200, 20240103, 2001, + 601, 701, 801, 901, + 602, 702, 802, 902, + 3001, 4001, 5001, 6001, 7001, + 8800001, 3, 15.00, 18.00, 16.50, + 4.50, 49.50, 45.00, 54.00, + 3.47, 0.00, 5.00, 49.50, 52.97, + 54.50, 58.00, 7.97 +); +""" +} + diff --git a/regression-test/suites/shape_check/tpcds_sf1000/load.groovy b/regression-test/suites/shape_check/tpcds_sf1000/load.groovy index 52eee1cd28d20a..e818d54bd7361f 100644 --- a/regression-test/suites/shape_check/tpcds_sf1000/load.groovy +++ b/regression-test/suites/shape_check/tpcds_sf1000/load.groovy @@ -1,2540 +1,2540 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("load") { - String database = context.config.getDbNameByFile(context.file) - sql "drop database if exists ${database}" - sql "create database ${database}" - sql "use ${database}" - - sql ''' - drop table if exists customer_demographics - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS customer_demographics ( - cd_demo_sk int not null, - cd_gender varchar(1), - cd_marital_status varchar(1), - cd_education_status varchar(20), - cd_purchase_estimate integer, - cd_credit_rating varchar(10), - cd_dep_count integer, - cd_dep_employed_count integer, - cd_dep_college_count integer - ) - DUPLICATE KEY(cd_demo_sk) - DISTRIBUTED BY HASH(cd_demo_sk) BUCKETS 9 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists reason - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS reason ( - r_reason_sk int not null, - r_reason_id varchar(16) not null, - r_reason_desc varchar(100) - ) - DUPLICATE KEY(r_reason_sk) - DISTRIBUTED BY HASH(r_reason_sk) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists date_dim - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS date_dim ( - d_date_sk int not null, - d_date_id varchar(16) not null, - d_date datev2, - d_month_seq integer, - d_week_seq integer, - d_quarter_seq integer, - d_year integer, - d_dow integer, - d_moy integer, - d_dom integer, - d_qoy integer, - d_fy_year integer, - d_fy_quarter_seq integer, - d_fy_week_seq integer, - d_day_name varchar(9), - d_quarter_name varchar(6), - d_holiday varchar(1), - d_weekend varchar(1), - d_following_holiday varchar(1), - d_first_dom integer, - d_last_dom integer, - d_same_day_ly integer, - d_same_day_lq integer, - d_current_day varchar(1), - d_current_week varchar(1), - d_current_month varchar(1), - d_current_quarter varchar(1), - d_current_year varchar(1) - ) - DUPLICATE KEY(d_date_sk) - DISTRIBUTED BY HASH(d_date_sk) BUCKETS 9 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists warehouse - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS warehouse ( - w_warehouse_sk int not null, - w_warehouse_id varchar(16) not null, - w_warehouse_name varchar(20), - w_warehouse_sq_ft integer, - w_street_number varchar(10), - w_street_name varchar(60), - w_street_type varchar(15), - w_suite_number varchar(10), - w_city varchar(60), - w_county varchar(30), - w_state varchar(2), - w_zip varchar(10), - w_country varchar(20), - w_gmt_offset decimalv3(5,2) - ) - DUPLICATE KEY(w_warehouse_sk) - DISTRIBUTED BY HASH(w_warehouse_sk) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists catalog_sales - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS catalog_sales ( - cs_sold_date_sk int, - cs_item_sk int not null, - cs_order_number int not null, - cs_sold_time_sk int, - cs_ship_date_sk int, - cs_bill_customer_sk int, - cs_bill_cdemo_sk int, - cs_bill_hdemo_sk int, - cs_bill_addr_sk int, - cs_ship_customer_sk int, - cs_ship_cdemo_sk int, - cs_ship_hdemo_sk int, - cs_ship_addr_sk int, - cs_call_center_sk int, - cs_catalog_page_sk int, - cs_ship_mode_sk int, - cs_warehouse_sk int, - cs_promo_sk int, - cs_quantity int, - cs_wholesale_cost decimalv3(7,2), - cs_list_price decimalv3(7,2), - cs_sales_price decimalv3(7,2), - cs_ext_discount_amt decimalv3(7,2), - cs_ext_sales_price decimalv3(7,2), - cs_ext_wholesale_cost decimalv3(7,2), - cs_ext_list_price decimalv3(7,2), - cs_ext_tax decimalv3(7,2), - cs_coupon_amt decimalv3(7,2), - cs_ext_ship_cost decimalv3(7,2), - cs_net_paid decimalv3(7,2), - cs_net_paid_inc_tax decimalv3(7,2), - cs_net_paid_inc_ship decimalv3(7,2), - cs_net_paid_inc_ship_tax decimalv3(7,2), - cs_net_profit decimalv3(7,2) - ) - DUPLICATE KEY(`cs_sold_date_sk`, `cs_item_sk`, `cs_order_number`) - DISTRIBUTED BY HASH(cs_item_sk, cs_order_number) BUCKETS 261 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists call_center - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS call_center ( - cc_call_center_sk int not null, - cc_call_center_id varchar(16) not null, - cc_rec_start_date date, - cc_rec_end_date date, - cc_closed_date_sk integer, - cc_open_date_sk integer, - cc_name varchar(50), - cc_class varchar(50), - cc_employees integer, - cc_sq_ft integer, - cc_hours varchar(20), - cc_manager varchar(40), - cc_mkt_id integer, - cc_mkt_class varchar(50), - cc_mkt_desc varchar(100), - cc_market_manager varchar(40), - cc_division integer, - cc_division_name varchar(50), - cc_company integer, - cc_company_name varchar(50), - cc_street_number varchar(10), - cc_street_name varchar(60), - cc_street_type varchar(15), - cc_suite_number varchar(10), - cc_city varchar(60), - cc_county varchar(30), - cc_state varchar(2), - cc_zip varchar(10), - cc_country varchar(20), - cc_gmt_offset decimalv3(5,2), - cc_tax_percentage decimalv3(5,2) - ) - DUPLICATE KEY(cc_call_center_sk) - DISTRIBUTED BY HASH(cc_call_center_sk) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists inventory - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS inventory ( - inv_date_sk int not null, - inv_item_sk int not null, - inv_warehouse_sk int, - inv_quantity_on_hand integer - ) - DUPLICATE KEY(inv_date_sk, inv_item_sk, inv_warehouse_sk) - DISTRIBUTED BY HASH(inv_item_sk) BUCKETS 63 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists catalog_returns - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS catalog_returns ( - cr_returned_date_sk int, - cr_item_sk int not null, - cr_order_number int not null, - cr_returned_time_sk int, - cr_refunded_customer_sk int, - cr_refunded_cdemo_sk int, - cr_refunded_hdemo_sk int, - cr_refunded_addr_sk int, - cr_returning_customer_sk int, - cr_returning_cdemo_sk int, - cr_returning_hdemo_sk int, - cr_returning_addr_sk int, - cr_call_center_sk int, - cr_catalog_page_sk int, - cr_ship_mode_sk int, - cr_warehouse_sk int, - cr_reason_sk int, - cr_return_quantity integer, - cr_return_amount decimalv3(7,2), - cr_return_tax decimalv3(7,2), - cr_return_amt_inc_tax decimalv3(7,2), - cr_fee decimalv3(7,2), - cr_return_ship_cost decimalv3(7,2), - cr_refunded_cash decimalv3(7,2), - cr_reversed_charge decimalv3(7,2), - cr_store_credit decimalv3(7,2), - cr_net_loss decimalv3(7,2) - ) - DUPLICATE KEY(`cr_returned_date_sk`, `cr_item_sk`, `cr_order_number`) - DISTRIBUTED BY HASH(cr_item_sk, cr_order_number) BUCKETS 36 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists household_demographics - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS household_demographics ( - hd_demo_sk int not null, - hd_income_band_sk int, - hd_buy_potential varchar(15), - hd_dep_count integer, - hd_vehicle_count integer - ) - DUPLICATE KEY(hd_demo_sk) - DISTRIBUTED BY HASH(hd_demo_sk) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists customer_address - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS customer_address ( - ca_address_sk int not null, - ca_address_id varchar(16) not null, - ca_street_number varchar(10), - ca_street_name varchar(60), - ca_street_type varchar(15), - ca_suite_number varchar(10), - ca_city varchar(60), - ca_county varchar(30), - ca_state varchar(2), - ca_zip varchar(10), - ca_country varchar(20), - ca_gmt_offset decimalv3(5,2), - ca_location_type varchar(20) - ) - DUPLICATE KEY(ca_address_sk) - DISTRIBUTED BY HASH(ca_address_sk) BUCKETS 18 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists income_band - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS income_band ( - ib_income_band_sk int not null, - ib_lower_bound integer, - ib_upper_bound integer - ) - DUPLICATE KEY(ib_income_band_sk) - DISTRIBUTED BY HASH(ib_income_band_sk) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists catalog_page - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS catalog_page ( - cp_catalog_page_sk int not null, - cp_catalog_page_id varchar(16) not null, - cp_start_date_sk integer, - cp_end_date_sk integer, - cp_department varchar(50), - cp_catalog_number integer, - cp_catalog_page_number integer, - cp_description varchar(100), - cp_type varchar(100) - ) - DUPLICATE KEY(cp_catalog_page_sk) - DISTRIBUTED BY HASH(cp_catalog_page_sk) BUCKETS 3 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists item - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS item ( - i_item_sk int not null, - i_item_id varchar(16) not null, - i_rec_start_date datev2, - i_rec_end_date datev2, - i_item_desc varchar(200), - i_current_price decimalv3(7,2), - i_wholesale_cost decimalv3(7,2), - i_brand_id integer, - i_brand varchar(50), - i_class_id integer, - i_class char(50), - i_category_id integer, - i_category varchar(50), - i_manufact_id integer, - i_manufact varchar(50), - i_size varchar(20), - i_formulation varchar(20), - i_color varchar(20), - i_units varchar(10), - i_container varchar(10), - i_manager_id integer, - i_product_name varchar(50) - ) - DUPLICATE KEY(i_item_sk) - DISTRIBUTED BY HASH(i_item_sk) BUCKETS 9 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists web_returns - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS web_returns ( - wr_returned_date_sk int, - wr_item_sk int not null, - wr_order_number int not null, - wr_returned_time_sk int, - wr_refunded_customer_sk int, - wr_refunded_cdemo_sk int, - wr_refunded_hdemo_sk int, - wr_refunded_addr_sk int, - wr_returning_customer_sk int, - wr_returning_cdemo_sk int, - wr_returning_hdemo_sk int, - wr_returning_addr_sk int, - wr_web_page_sk int, - wr_reason_sk int, - wr_return_quantity integer, - wr_return_amt decimalv3(7,2), - wr_return_tax decimalv3(7,2), - wr_return_amt_inc_tax decimalv3(7,2), - wr_fee decimalv3(7,2), - wr_return_ship_cost decimalv3(7,2), - wr_refunded_cash decimalv3(7,2), - wr_reversed_charge decimalv3(7,2), - wr_account_credit decimalv3(7,2), - wr_net_loss decimalv3(7,2) - ) - DUPLICATE KEY(`wr_returned_date_sk`, `wr_item_sk`, `wr_order_number`) - DISTRIBUTED BY HASH(`wr_item_sk`, `wr_order_number`) BUCKETS 18 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists web_site - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS web_site ( - web_site_sk int not null, - web_site_id varchar(16) not null, - web_rec_start_date datev2, - web_rec_end_date datev2, - web_name varchar(50), - web_open_date_sk int, - web_close_date_sk int, - web_class varchar(50), - web_manager varchar(40), - web_mkt_id integer, - web_mkt_class varchar(50), - web_mkt_desc varchar(100), - web_market_manager varchar(40), - web_company_id integer, - web_company_name varchar(50), - web_street_number varchar(10), - web_street_name varchar(60), - web_street_type varchar(15), - web_suite_number varchar(10), - web_city varchar(60), - web_county varchar(30), - web_state varchar(2), - web_zip varchar(10), - web_country varchar(20), - web_gmt_offset decimalv3(5,2), - web_tax_percentage decimalv3(5,2) - ) - DUPLICATE KEY(web_site_sk) - DISTRIBUTED BY HASH(web_site_sk) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists promotion - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS promotion ( - p_promo_sk int not null, - p_promo_id varchar(16) not null, - p_start_date_sk int, - p_end_date_sk int, - p_item_sk int, - p_cost decimalv3(15,2), - p_response_targe integer, - p_promo_name varchar(50), - p_channel_dmail varchar(1), - p_channel_email varchar(1), - p_channel_catalog varchar(1), - p_channel_tv varchar(1), - p_channel_radio varchar(1), - p_channel_press varchar(1), - p_channel_event varchar(1), - p_channel_demo varchar(1), - p_channel_details varchar(100), - p_purpose varchar(15), - p_discount_active varchar(1) - ) - DUPLICATE KEY(p_promo_sk) - DISTRIBUTED BY HASH(p_promo_sk) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists web_sales - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS web_sales ( - ws_sold_date_sk int, - ws_item_sk int not null, - ws_order_number int not null, - ws_sold_time_sk int, - ws_ship_date_sk int, - ws_bill_customer_sk int, - ws_bill_cdemo_sk int, - ws_bill_hdemo_sk int, - ws_bill_addr_sk int, - ws_ship_customer_sk int, - ws_ship_cdemo_sk int, - ws_ship_hdemo_sk int, - ws_ship_addr_sk int, - ws_web_page_sk int, - ws_web_site_sk int, - ws_ship_mode_sk int, - ws_warehouse_sk int, - ws_promo_sk int, - ws_quantity integer, - ws_wholesale_cost decimalv3(7,2), - ws_list_price decimalv3(7,2), - ws_sales_price decimalv3(7,2), - ws_ext_discount_amt decimalv3(7,2), - ws_ext_sales_price decimalv3(7,2), - ws_ext_wholesale_cost decimalv3(7,2), - ws_ext_list_price decimalv3(7,2), - ws_ext_tax decimalv3(7,2), - ws_coupon_amt decimalv3(7,2), - ws_ext_ship_cost decimalv3(7,2), - ws_net_paid decimalv3(7,2), - ws_net_paid_inc_tax decimalv3(7,2), - ws_net_paid_inc_ship decimalv3(7,2), - ws_net_paid_inc_ship_tax decimalv3(7,2), - ws_net_profit decimalv3(7,2) - ) - DUPLICATE KEY(`ws_sold_date_sk`, `ws_item_sk`, `ws_order_number`) - DISTRIBUTED BY HASH(ws_item_sk, ws_order_number) BUCKETS 126 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists store - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS store ( - s_store_sk int not null, - s_store_id varchar(16) not null, - s_rec_start_date datev2, - s_rec_end_date datev2, - s_closed_date_sk int, - s_store_name varchar(50), - s_number_employees integer, - s_floor_space integer, - s_hours varchar(20), - s_manager varchar(40), - s_market_id integer, - s_geography_class varchar(100), - s_market_desc varchar(100), - s_market_manager varchar(40), - s_division_id integer, - s_division_name varchar(50), - s_company_id integer, - s_company_name varchar(50), - s_street_number varchar(10), - s_street_name varchar(60), - s_street_type varchar(15), - s_suite_number varchar(10), - s_city varchar(60), - s_county varchar(30), - s_state varchar(2), - s_zip varchar(10), - s_country varchar(20), - s_gmt_offset decimalv3(5,2), - s_tax_percentage decimalv3(5,2) - ) - DUPLICATE KEY(s_store_sk) - DISTRIBUTED BY HASH(s_store_sk) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists time_dim - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS time_dim ( - t_time_sk int not null, - t_time_id varchar(16) not null, - t_time integer, - t_hour integer, - t_minute integer, - t_second integer, - t_am_pm varchar(2), - t_shift varchar(20), - t_sub_shift varchar(20), - t_meal_time varchar(20) - ) - DUPLICATE KEY(t_time_sk) - DISTRIBUTED BY HASH(t_time_sk) BUCKETS 9 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists web_page - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS web_page ( - wp_web_page_sk int not null, - wp_web_page_id varchar(16) not null, - wp_rec_start_date datev2, - wp_rec_end_date datev2, - wp_creation_date_sk int, - wp_access_date_sk int, - wp_autogen_flag varchar(1), - wp_customer_sk int, - wp_url varchar(100), - wp_type varchar(50), - wp_char_count integer, - wp_link_count integer, - wp_image_count integer, - wp_max_ad_count integer - ) - DUPLICATE KEY(wp_web_page_sk) - DISTRIBUTED BY HASH(wp_web_page_sk) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists store_returns - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS store_returns ( - sr_returned_date_sk int, - sr_item_sk int not null, - sr_ticket_number int not null, - sr_return_time_sk int, - sr_customer_sk int, - sr_cdemo_sk int, - sr_hdemo_sk int, - sr_addr_sk int, - sr_store_sk int, - sr_reason_sk int, - sr_return_quantity integer, - sr_return_amt decimalv3(7,2), - sr_return_tax decimalv3(7,2), - sr_return_amt_inc_tax decimalv3(7,2), - sr_fee decimalv3(7,2), - sr_return_ship_cost decimalv3(7,2), - sr_refunded_cash decimalv3(7,2), - sr_reversed_charge decimalv3(7,2), - sr_store_credit decimalv3(7,2), - sr_net_loss decimalv3(7,2) - ) - duplicate key(`sr_returned_date_sk`, `sr_item_sk`, `sr_ticket_number`) - distributed by hash (sr_item_sk, sr_ticket_number) buckets 36 - properties ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists store_sales - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS store_sales ( - ss_sold_date_sk int, - ss_item_sk int not null, - ss_ticket_number int not null, - ss_sold_time_sk int, - ss_customer_sk int, - ss_cdemo_sk int, - ss_hdemo_sk int, - ss_addr_sk int, - ss_store_sk int, - ss_promo_sk int, - ss_quantity integer, - ss_wholesale_cost decimalv3(7,2), - ss_list_price decimalv3(7,2), - ss_sales_price decimalv3(7,2), - ss_ext_discount_amt decimalv3(7,2), - ss_ext_sales_price decimalv3(7,2), - ss_ext_wholesale_cost decimalv3(7,2), - ss_ext_list_price decimalv3(7,2), - ss_ext_tax decimalv3(7,2), - ss_coupon_amt decimalv3(7,2), - ss_net_paid decimalv3(7,2), - ss_net_paid_inc_tax decimalv3(7,2), - ss_net_profit decimalv3(7,2) - ) - DUPLICATE KEY(`ss_sold_date_sk`, `ss_item_sk`, `ss_ticket_number`) - DISTRIBUTED BY HASH(ss_item_sk, ss_ticket_number) BUCKETS 261 - PROPERTIES ( - "replication_num" = "1", - "colocate_with" = "store" - ) - ''' - - sql ''' - drop table if exists ship_mode - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS ship_mode ( - sm_ship_mode_sk int not null, - sm_ship_mode_id varchar(16) not null, - sm_type varchar(30), - sm_code varchar(10), - sm_carrier varchar(20), - sm_contract varchar(20) - ) - DUPLICATE KEY(sm_ship_mode_sk) - DISTRIBUTED BY HASH(sm_ship_mode_sk) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists customer - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS customer ( - c_customer_sk int not null, - c_customer_id varchar(16) not null, - c_current_cdemo_sk int, - c_current_hdemo_sk int, - c_current_addr_sk int, - c_first_shipto_date_sk int, - c_first_sales_date_sk int, - c_salutation varchar(10), - c_first_name varchar(20), - c_last_name varchar(30), - c_preferred_cust_flag varchar(1), - c_birth_day integer, - c_birth_month integer, - c_birth_year integer, - c_birth_country varchar(20), - c_login varchar(13), - c_email_address varchar(50), - c_last_review_date_sk int - ) - DUPLICATE KEY(c_customer_sk) - DISTRIBUTED BY HASH(c_customer_sk) BUCKETS 18 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - drop table if exists dbgen_version - ''' - - sql ''' - CREATE TABLE IF NOT EXISTS dbgen_version - ( - dv_version varchar(16) , - dv_create_date datev2 , - dv_create_time datetime , - dv_cmdline_args varchar(200) - ) - DUPLICATE KEY(dv_version) - DISTRIBUTED BY HASH(dv_version) BUCKETS 1 - PROPERTIES ( - "replication_num" = "1" - ) - ''' - - sql ''' - alter table customer add constraint customer_pk primary key (c_customer_sk); - ''' - - sql ''' - alter table customer add constraint customer_uk unique (c_customer_id); - ''' - - sql ''' - alter table store_sales add constraint ss_fk foreign key(ss_customer_sk) references customer(c_customer_sk); - ''' - - sql ''' - alter table web_sales add constraint ws_fk foreign key(ws_bill_customer_sk) references customer(c_customer_sk); - ''' - - sql ''' - alter table catalog_sales add constraint cs_fk foreign key(cs_bill_customer_sk) references customer(c_customer_sk); - ''' - - sql """ - alter table customer_demographics modify column cd_dep_employed_count set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='7683200') - """ - - sql """ - alter table date_dim modify column d_day_name set stats ('row_count'='73049', 'ndv'='7', 'num_nulls'='0', 'min_value'='Friday', 'max_value'='Wednesday', 'data_size'='521779') - """ - - sql """ - alter table date_dim modify column d_following_holiday set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') - """ - - sql """ - alter table date_dim modify column d_same_day_ly set stats ('row_count'='73049', 'ndv'='72450', 'num_nulls'='0', 'min_value'='2414657', 'max_value'='2487705', 'data_size'='292196') - """ - - sql """ - alter table warehouse modify column w_city set stats ('row_count'='20', 'ndv'='12', 'num_nulls'='0', 'min_value'='Fairview', 'max_value'='Shiloh', 'data_size'='183') - """ - - sql """ - alter table warehouse modify column w_street_type set stats ('row_count'='20', 'ndv'='14', 'num_nulls'='0', 'min_value'='', 'max_value'='Wy', 'data_size'='71') - """ - - sql """ - alter table catalog_sales modify column cs_call_center_sk set stats ('row_count'='1439980416', 'ndv'='42', 'num_nulls'='7199711', 'min_value'='1', 'max_value'='42', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_net_paid_inc_ship set stats ('row_count'='1439980416', 'ndv'='2505826', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='43956.00', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_sales_price set stats ('row_count'='1439980416', 'ndv'='29306', 'num_nulls'='7200276', 'min_value'='0.00', 'max_value'='300.00', 'data_size'='5759921664') - """ - - sql """ - alter table call_center modify column cc_class set stats ('row_count'='42', 'ndv'='3', 'num_nulls'='0', 'min_value'='large', 'max_value'='small', 'data_size'='226') - """ - - sql """ - alter table call_center modify column cc_country set stats ('row_count'='42', 'ndv'='1', 'num_nulls'='0', 'min_value'='United States', 'max_value'='United States', 'data_size'='546') - """ - - sql """ - alter table call_center modify column cc_county set stats ('row_count'='42', 'ndv'='16', 'num_nulls'='0', 'min_value'='Barrow County', 'max_value'='Williamson County', 'data_size'='627') - """ - - sql """ - alter table call_center modify column cc_mkt_class set stats ('row_count'='42', 'ndv'='36', 'num_nulls'='0', 'min_value'='A bit narrow forms matter animals. Consist', 'max_value'='Yesterday new men can make moreov', 'data_size'='1465') - """ - - sql """ - alter table call_center modify column cc_sq_ft set stats ('row_count'='42', 'ndv'='31', 'num_nulls'='0', 'min_value'='-1890660328', 'max_value'='2122480316', 'data_size'='168') - """ - - sql """ - alter table call_center modify column cc_state set stats ('row_count'='42', 'ndv'='14', 'num_nulls'='0', 'min_value'='FL', 'max_value'='WV', 'data_size'='84') - """ - - sql """ - alter table inventory modify column inv_warehouse_sk set stats ('row_count'='783000000', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='6264000000') - """ - - sql """ - alter table catalog_returns modify column cr_refunded_addr_sk set stats ('row_count'='143996756', 'ndv'='6015811', 'num_nulls'='2881609', 'min_value'='1', 'max_value'='6000000', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_refunded_cash set stats ('row_count'='143996756', 'ndv'='1107525', 'num_nulls'='2879192', 'min_value'='0.00', 'max_value'='26955.24', 'data_size'='575987024') - """ - - sql """ - alter table catalog_returns modify column cr_refunded_cdemo_sk set stats ('row_count'='143996756', 'ndv'='1916366', 'num_nulls'='2881314', 'min_value'='1', 'max_value'='1920800', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_return_amt_inc_tax set stats ('row_count'='143996756', 'ndv'='1544502', 'num_nulls'='2881886', 'min_value'='0.00', 'max_value'='30418.06', 'data_size'='575987024') - """ - - sql """ - alter table catalog_returns modify column cr_returning_addr_sk set stats ('row_count'='143996756', 'ndv'='6015811', 'num_nulls'='2883215', 'min_value'='1', 'max_value'='6000000', 'data_size'='1151974048') - """ - - sql """ - alter table household_demographics modify column hd_buy_potential set stats ('row_count'='7200', 'ndv'='6', 'num_nulls'='0', 'min_value'='0-500', 'max_value'='Unknown', 'data_size'='54000') - """ - - sql """ - alter table customer_address modify column ca_address_id set stats ('row_count'='6000000', 'ndv'='5984931', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAAABAA', 'max_value'='AAAAAAAAPPPPPEAA', 'data_size'='96000000') - """ - - sql """ - alter table customer_address modify column ca_address_sk set stats ('row_count'='6000000', 'ndv'='6015811', 'num_nulls'='0', 'min_value'='1', 'max_value'='6000000', 'data_size'='48000000') - """ - - sql """ - alter table customer_address modify column ca_country set stats ('row_count'='6000000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='United States', 'data_size'='75661794') - """ - - sql """ - alter table customer_address modify column ca_location_type set stats ('row_count'='6000000', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='single family', 'data_size'='52372545') - """ - - sql """ - alter table customer_address modify column ca_street_number set stats ('row_count'='6000000', 'ndv'='1002', 'num_nulls'='0', 'min_value'='', 'max_value'='999', 'data_size'='16837336') - """ - - sql """ - alter table customer_address modify column ca_suite_number set stats ('row_count'='6000000', 'ndv'='76', 'num_nulls'='0', 'min_value'='', 'max_value'='Suite Y', 'data_size'='45911575') - """ - - sql """ - alter table catalog_page modify column cp_catalog_page_id set stats ('row_count'='30000', 'ndv'='29953', 'num_nulls'='0', 'min_value'='AAAAAAAAAAABAAAA', 'max_value'='AAAAAAAAPPPGAAAA', 'data_size'='480000') - """ - - sql """ - alter table item modify column i_rec_end_date set stats ('row_count'='300000', 'ndv'='3', 'num_nulls'='150000', 'min_value'='1999-10-27', 'max_value'='2001-10-26', 'data_size'='1200000') - """ - - sql """ - alter table web_returns modify column wr_refunded_addr_sk set stats ('row_count'='71997522', 'ndv'='6015811', 'num_nulls'='3239971', 'min_value'='1', 'max_value'='6000000', 'data_size'='575980176') - """ - - sql """ - alter table web_returns modify column wr_reversed_charge set stats ('row_count'='71997522', 'ndv'='692680', 'num_nulls'='3239546', 'min_value'='0.00', 'max_value'='23194.77', 'data_size'='287990088') - """ - - sql """ - alter table web_site modify column web_state set stats ('row_count'='54', 'ndv'='18', 'num_nulls'='0', 'min_value'='AL', 'max_value'='WV', 'data_size'='108') - """ - - sql """ - alter table promotion modify column p_end_date_sk set stats ('row_count'='1500', 'ndv'='683', 'num_nulls'='18', 'min_value'='2450113', 'max_value'='2450967', 'data_size'='12000') - """ - - sql """ - alter table web_sales modify column ws_bill_hdemo_sk set stats ('row_count'='720000376', 'ndv'='7251', 'num_nulls'='180139', 'min_value'='1', 'max_value'='7200', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_ext_ship_cost set stats ('row_count'='720000376', 'ndv'='567477', 'num_nulls'='180084', 'min_value'='0.00', 'max_value'='14950.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_ship_addr_sk set stats ('row_count'='720000376', 'ndv'='6015811', 'num_nulls'='179848', 'min_value'='1', 'max_value'='6000000', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_ship_mode_sk set stats ('row_count'='720000376', 'ndv'='20', 'num_nulls'='180017', 'min_value'='1', 'max_value'='20', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_warehouse_sk set stats ('row_count'='720000376', 'ndv'='20', 'num_nulls'='180105', 'min_value'='1', 'max_value'='20', 'data_size'='5760003008') - """ - - sql """ - alter table store modify column s_company_name set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='6965') - """ - - sql """ - alter table store modify column s_gmt_offset set stats ('row_count'='1002', 'ndv'='4', 'num_nulls'='6', 'min_value'='-8.00', 'max_value'='-5.00', 'data_size'='4008') - """ - - sql """ - alter table store modify column s_manager set stats ('row_count'='1002', 'ndv'='739', 'num_nulls'='0', 'min_value'='', 'max_value'='Zane Clifton', 'data_size'='12649') - """ - - sql """ - alter table store modify column s_street_number set stats ('row_count'='1002', 'ndv'='521', 'num_nulls'='0', 'min_value'='', 'max_value'='999', 'data_size'='2874') - """ - - sql """ - alter table time_dim modify column t_meal_time set stats ('row_count'='86400', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='lunch', 'data_size'='248400') - """ - - sql """ - alter table time_dim modify column t_time set stats ('row_count'='86400', 'ndv'='86684', 'num_nulls'='0', 'min_value'='0', 'max_value'='86399', 'data_size'='345600') - """ - - sql """ - alter table web_page modify column wp_creation_date_sk set stats ('row_count'='3000', 'ndv'='199', 'num_nulls'='33', 'min_value'='2450604', 'max_value'='2450815', 'data_size'='24000') - """ - - sql """ - alter table web_page modify column wp_customer_sk set stats ('row_count'='3000', 'ndv'='713', 'num_nulls'='2147', 'min_value'='9522', 'max_value'='11995685', 'data_size'='24000') - """ - - sql """ - alter table web_page modify column wp_max_ad_count set stats ('row_count'='3000', 'ndv'='5', 'num_nulls'='31', 'min_value'='0', 'max_value'='4', 'data_size'='12000') - """ - - sql """ - alter table web_page modify column wp_url set stats ('row_count'='3000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='http://www.foo.com', 'data_size'='53406') - """ - - sql """ - alter table store_returns modify column sr_refunded_cash set stats ('row_count'='287999764', 'ndv'='928470', 'num_nulls'='10081294', 'min_value'='0.00', 'max_value'='18173.96', 'data_size'='1151999056') - """ - - sql """ - alter table store_returns modify column sr_return_tax set stats ('row_count'='287999764', 'ndv'='117247', 'num_nulls'='10081332', 'min_value'='0.00', 'max_value'='1682.04', 'data_size'='1151999056') - """ - - sql """ - alter table store_sales modify column ss_customer_sk set stats ('row_count'='2879987999', 'ndv'='12157481', 'num_nulls'='129590766', 'min_value'='1', 'max_value'='12000000', 'data_size'='23039903992') - """ - - sql """ - alter table store_sales modify column ss_hdemo_sk set stats ('row_count'='2879987999', 'ndv'='7251', 'num_nulls'='129594559', 'min_value'='1', 'max_value'='7200', 'data_size'='23039903992') - """ - - sql """ - alter table store_sales modify column ss_store_sk set stats ('row_count'='2879987999', 'ndv'='499', 'num_nulls'='129572050', 'min_value'='1', 'max_value'='1000', 'data_size'='23039903992') - """ - - sql """ - alter table ship_mode modify column sm_ship_mode_id set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPAAAAAAA', 'data_size'='320') - """ - - sql """ - alter table ship_mode modify column sm_ship_mode_sk set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='160') - """ - - sql """ - alter table customer modify column c_first_name set stats ('row_count'='12000000', 'ndv'='5140', 'num_nulls'='0', 'min_value'='', 'max_value'='Zulma', 'data_size'='67593278') - """ - - sql """ - alter table customer modify column c_first_sales_date_sk set stats ('row_count'='12000000', 'ndv'='3644', 'num_nulls'='419856', 'min_value'='2448998', 'max_value'='2452648', 'data_size'='96000000') - """ - - sql """ - alter table customer modify column c_first_shipto_date_sk set stats ('row_count'='12000000', 'ndv'='3644', 'num_nulls'='420769', 'min_value'='2449028', 'max_value'='2452678', 'data_size'='96000000') - """ - - sql """ - alter table customer_demographics modify column cd_dep_college_count set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='7683200') - """ - - sql """ - alter table date_dim modify column d_dow set stats ('row_count'='73049', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_fy_quarter_seq set stats ('row_count'='73049', 'ndv'='801', 'num_nulls'='0', 'min_value'='1', 'max_value'='801', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_qoy set stats ('row_count'='73049', 'ndv'='4', 'num_nulls'='0', 'min_value'='1', 'max_value'='4', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_quarter_seq set stats ('row_count'='73049', 'ndv'='801', 'num_nulls'='0', 'min_value'='1', 'max_value'='801', 'data_size'='292196') - """ - - sql """ - alter table warehouse modify column w_street_name set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='', 'max_value'='Wilson Elm', 'data_size'='176') - """ - - sql """ - alter table warehouse modify column w_suite_number set stats ('row_count'='20', 'ndv'='18', 'num_nulls'='0', 'min_value'='', 'max_value'='Suite X', 'data_size'='150') - """ - - sql """ - alter table catalog_sales modify column cs_bill_cdemo_sk set stats ('row_count'='1439980416', 'ndv'='1916366', 'num_nulls'='7202134', 'min_value'='1', 'max_value'='1920800', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_bill_hdemo_sk set stats ('row_count'='1439980416', 'ndv'='7251', 'num_nulls'='7198837', 'min_value'='1', 'max_value'='7200', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_ext_ship_cost set stats ('row_count'='1439980416', 'ndv'='573238', 'num_nulls'='7202537', 'min_value'='0.00', 'max_value'='14994.00', 'data_size'='5759921664') - """ - - sql """ - alter table call_center modify column cc_name set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='California', 'max_value'='Pacific Northwest_2', 'data_size'='572') - """ - - sql """ - alter table call_center modify column cc_street_name set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='1st', 'max_value'='Willow', 'data_size'='356') - """ - - sql """ - alter table call_center modify column cc_zip set stats ('row_count'='42', 'ndv'='19', 'num_nulls'='0', 'min_value'='18605', 'max_value'='98048', 'data_size'='210') - """ - - sql """ - alter table inventory modify column inv_quantity_on_hand set stats ('row_count'='783000000', 'ndv'='1006', 'num_nulls'='39153758', 'min_value'='0', 'max_value'='1000', 'data_size'='3132000000') - """ - - sql """ - alter table catalog_returns modify column cr_catalog_page_sk set stats ('row_count'='143996756', 'ndv'='17005', 'num_nulls'='2882502', 'min_value'='1', 'max_value'='25207', 'data_size'='1151974048') - """ - - sql """ - alter table household_demographics modify column hd_income_band_sk set stats ('row_count'='7200', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='57600') - """ - - sql """ - alter table catalog_page modify column cp_description set stats ('row_count'='30000', 'ndv'='30141', 'num_nulls'='0', 'min_value'='', 'max_value'='Youngsters worry both workers. Fascinating characters take cheap never alive studies. Direct, old', 'data_size'='2215634') - """ - - sql """ - alter table item modify column i_item_id set stats ('row_count'='300000', 'ndv'='150851', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAABAAA', 'max_value'='AAAAAAAAPPPPBAAA', 'data_size'='4800000') - """ - - sql """ - alter table web_returns modify column wr_account_credit set stats ('row_count'='71997522', 'ndv'='683955', 'num_nulls'='3241972', 'min_value'='0.00', 'max_value'='23166.33', 'data_size'='287990088') - """ - - sql """ - alter table web_returns modify column wr_net_loss set stats ('row_count'='71997522', 'ndv'='815608', 'num_nulls'='3240573', 'min_value'='0.50', 'max_value'='15887.84', 'data_size'='287990088') - """ - - sql """ - alter table web_returns modify column wr_return_amt set stats ('row_count'='71997522', 'ndv'='808311', 'num_nulls'='3238405', 'min_value'='0.00', 'max_value'='29191.00', 'data_size'='287990088') - """ - - sql """ - alter table web_returns modify column wr_return_amt_inc_tax set stats ('row_count'='71997522', 'ndv'='1359913', 'num_nulls'='3239765', 'min_value'='0.00', 'max_value'='30393.01', 'data_size'='287990088') - """ - - sql """ - alter table web_returns modify column wr_return_quantity set stats ('row_count'='71997522', 'ndv'='100', 'num_nulls'='3238643', 'min_value'='1', 'max_value'='100', 'data_size'='287990088') - """ - - sql """ - alter table web_returns modify column wr_returning_addr_sk set stats ('row_count'='71997522', 'ndv'='6015811', 'num_nulls'='3239658', 'min_value'='1', 'max_value'='6000000', 'data_size'='575980176') - """ - - sql """ - alter table web_returns modify column wr_returning_customer_sk set stats ('row_count'='71997522', 'ndv'='12119220', 'num_nulls'='3237281', 'min_value'='1', 'max_value'='12000000', 'data_size'='575980176') - """ - - sql """ - alter table web_site modify column web_mkt_desc set stats ('row_count'='54', 'ndv'='38', 'num_nulls'='0', 'min_value'='Acres see else children. Mutual too', 'max_value'='Windows increase to a differences. Other parties might in', 'data_size'='3473') - """ - - sql """ - alter table web_site modify column web_mkt_id set stats ('row_count'='54', 'ndv'='6', 'num_nulls'='1', 'min_value'='1', 'max_value'='6', 'data_size'='216') - """ - - sql """ - alter table web_site modify column web_rec_end_date set stats ('row_count'='54', 'ndv'='3', 'num_nulls'='27', 'min_value'='1999-08-16', 'max_value'='2001-08-15', 'data_size'='216') - """ - - sql """ - alter table web_site modify column web_site_id set stats ('row_count'='54', 'ndv'='27', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPBAAAAAA', 'data_size'='864') - """ - - sql """ - alter table web_site modify column web_street_type set stats ('row_count'='54', 'ndv'='20', 'num_nulls'='0', 'min_value'='Ave', 'max_value'='Wy', 'data_size'='208') - """ - - sql """ - alter table promotion modify column p_channel_demo set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1479') - """ - - sql """ - alter table promotion modify column p_channel_details set stats ('row_count'='1500', 'ndv'='1490', 'num_nulls'='0', 'min_value'='', 'max_value'='Young, valuable companies watch walls. Payments can flour', 'data_size'='59126') - """ - - sql """ - alter table promotion modify column p_channel_event set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1482') - """ - - sql """ - alter table promotion modify column p_discount_active set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1473') - """ - - sql """ - alter table promotion modify column p_promo_sk set stats ('row_count'='1500', 'ndv'='1489', 'num_nulls'='0', 'min_value'='1', 'max_value'='1500', 'data_size'='12000') - """ - - sql """ - alter table promotion modify column p_purpose set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='10374') - """ - - sql """ - alter table web_sales modify column ws_bill_cdemo_sk set stats ('row_count'='720000376', 'ndv'='1916366', 'num_nulls'='179788', 'min_value'='1', 'max_value'='1920800', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_sold_date_sk set stats ('row_count'='720000376', 'ndv'='1820', 'num_nulls'='179921', 'min_value'='2450816', 'max_value'='2452642', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_web_site_sk set stats ('row_count'='720000376', 'ndv'='54', 'num_nulls'='179930', 'min_value'='1', 'max_value'='54', 'data_size'='5760003008') - """ - - sql """ - alter table store modify column s_city set stats ('row_count'='1002', 'ndv'='55', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodlawn', 'data_size'='9238') - """ - - sql """ - alter table store modify column s_company_id set stats ('row_count'='1002', 'ndv'='1', 'num_nulls'='7', 'min_value'='1', 'max_value'='1', 'data_size'='4008') - """ - - sql """ - alter table store modify column s_county set stats ('row_count'='1002', 'ndv'='28', 'num_nulls'='0', 'min_value'='', 'max_value'='Ziebach County', 'data_size'='14291') - """ - - sql """ - alter table store modify column s_geography_class set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='6972') - """ - - sql """ - alter table store modify column s_hours set stats ('row_count'='1002', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='8AM-8AM', 'data_size'='7088') - """ - - sql """ - alter table store modify column s_store_id set stats ('row_count'='1002', 'ndv'='501', 'num_nulls'='0', 'min_value'='AAAAAAAAAABAAAAA', 'max_value'='AAAAAAAAPPBAAAAA', 'data_size'='16032') - """ - - sql """ - alter table store modify column s_zip set stats ('row_count'='1002', 'ndv'='354', 'num_nulls'='0', 'min_value'='', 'max_value'='99454', 'data_size'='4975') - """ - - sql """ - alter table time_dim modify column t_am_pm set stats ('row_count'='86400', 'ndv'='2', 'num_nulls'='0', 'min_value'='AM', 'max_value'='PM', 'data_size'='172800') - """ - - sql """ - alter table time_dim modify column t_minute set stats ('row_count'='86400', 'ndv'='60', 'num_nulls'='0', 'min_value'='0', 'max_value'='59', 'data_size'='345600') - """ - - sql """ - alter table web_page modify column wp_web_page_id set stats ('row_count'='3000', 'ndv'='1501', 'num_nulls'='0', 'min_value'='AAAAAAAAAABAAAAA', 'max_value'='AAAAAAAAPPKAAAAA', 'data_size'='48000') - """ - - sql """ - alter table web_page modify column wp_web_page_sk set stats ('row_count'='3000', 'ndv'='2984', 'num_nulls'='0', 'min_value'='1', 'max_value'='3000', 'data_size'='24000') - """ - - sql """ - alter table store_returns modify column sr_return_amt set stats ('row_count'='287999764', 'ndv'='671228', 'num_nulls'='10080055', 'min_value'='0.00', 'max_value'='19434.00', 'data_size'='1151999056') - """ - - sql """ - alter table store_returns modify column sr_returned_date_sk set stats ('row_count'='287999764', 'ndv'='2010', 'num_nulls'='10079607', 'min_value'='2450820', 'max_value'='2452822', 'data_size'='2303998112') - """ - - sql """ - alter table store_sales modify column ss_ext_tax set stats ('row_count'='2879987999', 'ndv'='149597', 'num_nulls'='129588732', 'min_value'='0.00', 'max_value'='1797.48', 'data_size'='11519951996') - """ - - sql """ - alter table customer modify column c_current_cdemo_sk set stats ('row_count'='12000000', 'ndv'='1913901', 'num_nulls'='419895', 'min_value'='1', 'max_value'='1920800', 'data_size'='96000000') - """ - - sql """ - alter table customer modify column c_customer_id set stats ('row_count'='12000000', 'ndv'='11921032', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAAABAA', 'max_value'='AAAAAAAAPPPPPKAA', 'data_size'='192000000') - """ - - sql """ - alter table date_dim modify column d_current_day set stats ('row_count'='73049', 'ndv'='1', 'num_nulls'='0', 'min_value'='N', 'max_value'='N', 'data_size'='73049') - """ - - sql """ - alter table date_dim modify column d_current_month set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') - """ - - sql """ - alter table date_dim modify column d_date set stats ('row_count'='73049', 'ndv'='73250', 'num_nulls'='0', 'min_value'='1900-01-02', 'max_value'='2100-01-01', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_moy set stats ('row_count'='73049', 'ndv'='12', 'num_nulls'='0', 'min_value'='1', 'max_value'='12', 'data_size'='292196') - """ - - sql """ - alter table warehouse modify column w_gmt_offset set stats ('row_count'='20', 'ndv'='3', 'num_nulls'='1', 'min_value'='-7.00', 'max_value'='-5.00', 'data_size'='80') - """ - - sql """ - alter table warehouse modify column w_warehouse_sk set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='160') - """ - - sql """ - alter table warehouse modify column w_warehouse_sq_ft set stats ('row_count'='20', 'ndv'='19', 'num_nulls'='1', 'min_value'='73065', 'max_value'='977787', 'data_size'='80') - """ - - sql """ - alter table catalog_sales modify column cs_ext_sales_price set stats ('row_count'='1439980416', 'ndv'='1100662', 'num_nulls'='7199625', 'min_value'='0.00', 'max_value'='29943.00', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_ext_wholesale_cost set stats ('row_count'='1439980416', 'ndv'='393180', 'num_nulls'='7199876', 'min_value'='1.00', 'max_value'='10000.00', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_item_sk set stats ('row_count'='1439980416', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_net_paid_inc_tax set stats ('row_count'='1439980416', 'ndv'='2422238', 'num_nulls'='7200702', 'min_value'='0.00', 'max_value'='32376.27', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_ship_date_sk set stats ('row_count'='1439980416', 'ndv'='1933', 'num_nulls'='7200707', 'min_value'='2450817', 'max_value'='2452744', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_warehouse_sk set stats ('row_count'='1439980416', 'ndv'='20', 'num_nulls'='7200688', 'min_value'='1', 'max_value'='20', 'data_size'='11519843328') - """ - - sql """ - alter table call_center modify column cc_division set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='168') - """ - - sql """ - alter table call_center modify column cc_division_name set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='able', 'max_value'='pri', 'data_size'='164') - """ - - sql """ - alter table call_center modify column cc_manager set stats ('row_count'='42', 'ndv'='28', 'num_nulls'='0', 'min_value'='Alden Snyder', 'max_value'='Wayne Ray', 'data_size'='519') - """ - - sql """ - alter table call_center modify column cc_rec_start_date set stats ('row_count'='42', 'ndv'='4', 'num_nulls'='0', 'min_value'='1998-01-01', 'max_value'='2002-01-01', 'data_size'='168') - """ - - sql """ - alter table catalog_returns modify column cr_call_center_sk set stats ('row_count'='143996756', 'ndv'='42', 'num_nulls'='2881668', 'min_value'='1', 'max_value'='42', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_net_loss set stats ('row_count'='143996756', 'ndv'='911034', 'num_nulls'='2881704', 'min_value'='0.50', 'max_value'='16095.08', 'data_size'='575987024') - """ - - sql """ - alter table catalog_returns modify column cr_refunded_customer_sk set stats ('row_count'='143996756', 'ndv'='12156363', 'num_nulls'='2879017', 'min_value'='1', 'max_value'='12000000', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_refunded_hdemo_sk set stats ('row_count'='143996756', 'ndv'='7251', 'num_nulls'='2882107', 'min_value'='1', 'max_value'='7200', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_returning_customer_sk set stats ('row_count'='143996756', 'ndv'='12157481', 'num_nulls'='2879023', 'min_value'='1', 'max_value'='12000000', 'data_size'='1151974048') - """ - - sql """ - alter table customer_address modify column ca_gmt_offset set stats ('row_count'='6000000', 'ndv'='6', 'num_nulls'='180219', 'min_value'='-10.00', 'max_value'='-5.00', 'data_size'='24000000') - """ - - sql """ - alter table item modify column i_color set stats ('row_count'='300000', 'ndv'='93', 'num_nulls'='0', 'min_value'='', 'max_value'='yellow', 'data_size'='1610293') - """ - - sql """ - alter table item modify column i_manufact set stats ('row_count'='300000', 'ndv'='1004', 'num_nulls'='0', 'min_value'='', 'max_value'='pripripri', 'data_size'='3379693') - """ - - sql """ - alter table item modify column i_product_name set stats ('row_count'='300000', 'ndv'='294994', 'num_nulls'='0', 'min_value'='', 'max_value'='pripripripripriought', 'data_size'='6849199') - """ - - sql """ - alter table web_returns modify column wr_returned_time_sk set stats ('row_count'='71997522', 'ndv'='87677', 'num_nulls'='3238574', 'min_value'='0', 'max_value'='86399', 'data_size'='575980176') - """ - - sql """ - alter table web_site modify column web_manager set stats ('row_count'='54', 'ndv'='40', 'num_nulls'='0', 'min_value'='', 'max_value'='William Young', 'data_size'='658') - """ - - sql """ - alter table web_site modify column web_mkt_class set stats ('row_count'='54', 'ndv'='40', 'num_nulls'='0', 'min_value'='', 'max_value'='Written, political plans show to the models. T', 'data_size'='1822') - """ - - sql """ - alter table web_site modify column web_rec_start_date set stats ('row_count'='54', 'ndv'='4', 'num_nulls'='2', 'min_value'='1997-08-16', 'max_value'='2001-08-16', 'data_size'='216') - """ - - sql """ - alter table web_site modify column web_street_number set stats ('row_count'='54', 'ndv'='36', 'num_nulls'='0', 'min_value'='', 'max_value'='983', 'data_size'='154') - """ - - sql """ - alter table promotion modify column p_channel_catalog set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1482') - """ - - sql """ - alter table promotion modify column p_promo_id set stats ('row_count'='1500', 'ndv'='1519', 'num_nulls'='0', 'min_value'='AAAAAAAAAABAAAAA', 'max_value'='AAAAAAAAPPEAAAAA', 'data_size'='24000') - """ - - sql """ - alter table web_sales modify column ws_bill_customer_sk set stats ('row_count'='720000376', 'ndv'='12103729', 'num_nulls'='179817', 'min_value'='1', 'max_value'='12000000', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_list_price set stats ('row_count'='720000376', 'ndv'='29396', 'num_nulls'='180053', 'min_value'='1.00', 'max_value'='300.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_sales_price set stats ('row_count'='720000376', 'ndv'='29288', 'num_nulls'='180005', 'min_value'='0.00', 'max_value'='300.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_ship_hdemo_sk set stats ('row_count'='720000376', 'ndv'='7251', 'num_nulls'='179824', 'min_value'='1', 'max_value'='7200', 'data_size'='5760003008') - """ - - sql """ - alter table store modify column s_closed_date_sk set stats ('row_count'='1002', 'ndv'='163', 'num_nulls'='729', 'min_value'='2450820', 'max_value'='2451313', 'data_size'='8016') - """ - - sql """ - alter table store modify column s_division_id set stats ('row_count'='1002', 'ndv'='1', 'num_nulls'='6', 'min_value'='1', 'max_value'='1', 'data_size'='4008') - """ - - sql """ - alter table store modify column s_market_desc set stats ('row_count'='1002', 'ndv'='765', 'num_nulls'='0', 'min_value'='', 'max_value'='Yesterday left factors handle continuing co', 'data_size'='57638') - """ - - sql """ - alter table store modify column s_market_id set stats ('row_count'='1002', 'ndv'='10', 'num_nulls'='8', 'min_value'='1', 'max_value'='10', 'data_size'='4008') - """ - - sql """ - alter table store modify column s_state set stats ('row_count'='1002', 'ndv'='22', 'num_nulls'='0', 'min_value'='', 'max_value'='WV', 'data_size'='1994') - """ - - sql """ - alter table store modify column s_store_sk set stats ('row_count'='1002', 'ndv'='988', 'num_nulls'='0', 'min_value'='1', 'max_value'='1002', 'data_size'='8016') - """ - - sql """ - alter table store modify column s_street_name set stats ('row_count'='1002', 'ndv'='549', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodland Oak', 'data_size'='8580') - """ - - sql """ - alter table web_page modify column wp_access_date_sk set stats ('row_count'='3000', 'ndv'='101', 'num_nulls'='31', 'min_value'='2452548', 'max_value'='2452648', 'data_size'='24000') - """ - - sql """ - alter table web_page modify column wp_char_count set stats ('row_count'='3000', 'ndv'='1883', 'num_nulls'='42', 'min_value'='303', 'max_value'='8523', 'data_size'='12000') - """ - - sql """ - alter table store_returns modify column sr_addr_sk set stats ('row_count'='287999764', 'ndv'='6015811', 'num_nulls'='10082311', 'min_value'='1', 'max_value'='6000000', 'data_size'='2303998112') - """ - - sql """ - alter table store_returns modify column sr_return_time_sk set stats ('row_count'='287999764', 'ndv'='32660', 'num_nulls'='10082805', 'min_value'='28799', 'max_value'='61199', 'data_size'='2303998112') - """ - - sql """ - alter table store_returns modify column sr_store_sk set stats ('row_count'='287999764', 'ndv'='499', 'num_nulls'='10081871', 'min_value'='1', 'max_value'='1000', 'data_size'='2303998112') - """ - - sql """ - alter table store_sales modify column ss_coupon_amt set stats ('row_count'='2879987999', 'ndv'='1161208', 'num_nulls'='129609101', 'min_value'='0.00', 'max_value'='19778.00', 'data_size'='11519951996') - """ - - sql """ - alter table store_sales modify column ss_sales_price set stats ('row_count'='2879987999', 'ndv'='19780', 'num_nulls'='129598061', 'min_value'='0.00', 'max_value'='200.00', 'data_size'='11519951996') - """ - - sql """ - alter table customer modify column c_birth_country set stats ('row_count'='12000000', 'ndv'='211', 'num_nulls'='0', 'min_value'='', 'max_value'='ZIMBABWE', 'data_size'='100750845') - """ - - sql """ - alter table customer modify column c_birth_month set stats ('row_count'='12000000', 'ndv'='12', 'num_nulls'='419629', 'min_value'='1', 'max_value'='12', 'data_size'='48000000') - """ - - sql """ - alter table customer modify column c_customer_sk set stats ('row_count'='12000000', 'ndv'='12157481', 'num_nulls'='0', 'min_value'='1', 'max_value'='12000000', 'data_size'='96000000') - """ - - sql """ - alter table customer modify column c_email_address set stats ('row_count'='12000000', 'ndv'='11642077', 'num_nulls'='0', 'min_value'='', 'max_value'='Zulma.Young@aDhzZzCzYN.edu', 'data_size'='318077849') - """ - - sql """ - alter table customer modify column c_last_review_date_sk set stats ('row_count'='12000000', 'ndv'='366', 'num_nulls'='419900', 'min_value'='2452283', 'max_value'='2452648', 'data_size'='96000000') - """ - - sql """ - alter table customer modify column c_preferred_cust_flag set stats ('row_count'='12000000', 'ndv'='3', 'num_nulls'='0', 'min_value'='', 'max_value'='Y', 'data_size'='11580510') - """ - - sql """ - alter table dbgen_version modify column dv_version set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='3.2.0', 'max_value'='3.2.0', 'data_size'='5') - """ - - sql """ - alter table customer_demographics modify column cd_purchase_estimate set stats ('row_count'='1920800', 'ndv'='20', 'num_nulls'='0', 'min_value'='500', 'max_value'='10000', 'data_size'='7683200') - """ - - sql """ - alter table reason modify column r_reason_id set stats ('row_count'='65', 'ndv'='65', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPDAAAAAA', 'data_size'='1040') - """ - - sql """ - alter table reason modify column r_reason_sk set stats ('row_count'='65', 'ndv'='65', 'num_nulls'='0', 'min_value'='1', 'max_value'='65', 'data_size'='520') - """ - - sql """ - alter table date_dim modify column d_current_week set stats ('row_count'='73049', 'ndv'='1', 'num_nulls'='0', 'min_value'='N', 'max_value'='N', 'data_size'='73049') - """ - - sql """ - alter table date_dim modify column d_first_dom set stats ('row_count'='73049', 'ndv'='2410', 'num_nulls'='0', 'min_value'='2415021', 'max_value'='2488070', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_fy_year set stats ('row_count'='73049', 'ndv'='202', 'num_nulls'='0', 'min_value'='1900', 'max_value'='2100', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_last_dom set stats ('row_count'='73049', 'ndv'='2419', 'num_nulls'='0', 'min_value'='2415020', 'max_value'='2488372', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_month_seq set stats ('row_count'='73049', 'ndv'='2398', 'num_nulls'='0', 'min_value'='0', 'max_value'='2400', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_quarter_name set stats ('row_count'='73049', 'ndv'='799', 'num_nulls'='0', 'min_value'='1900Q1', 'max_value'='2100Q1', 'data_size'='438294') - """ - - sql """ - alter table warehouse modify column w_county set stats ('row_count'='20', 'ndv'='14', 'num_nulls'='0', 'min_value'='Bronx County', 'max_value'='Ziebach County', 'data_size'='291') - """ - - sql """ - alter table warehouse modify column w_street_number set stats ('row_count'='20', 'ndv'='19', 'num_nulls'='0', 'min_value'='', 'max_value'='957', 'data_size'='54') - """ - - sql """ - alter table warehouse modify column w_warehouse_name set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='', 'max_value'='Therefore urg', 'data_size'='307') - """ - - sql """ - alter table catalog_sales modify column cs_ext_discount_amt set stats ('row_count'='1439980416', 'ndv'='1100115', 'num_nulls'='7201054', 'min_value'='0.00', 'max_value'='29982.00', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_net_paid_inc_ship_tax set stats ('row_count'='1439980416', 'ndv'='3312360', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='46593.36', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_promo_sk set stats ('row_count'='1439980416', 'ndv'='1489', 'num_nulls'='7202844', 'min_value'='1', 'max_value'='1500', 'data_size'='11519843328') - """ - - sql """ - alter table call_center modify column cc_call_center_id set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPBAAAAAA', 'data_size'='672') - """ - - sql """ - alter table call_center modify column cc_employees set stats ('row_count'='42', 'ndv'='30', 'num_nulls'='0', 'min_value'='69020', 'max_value'='6879074', 'data_size'='168') - """ - - sql """ - alter table call_center modify column cc_suite_number set stats ('row_count'='42', 'ndv'='18', 'num_nulls'='0', 'min_value'='Suite 0', 'max_value'='Suite W', 'data_size'='326') - """ - - sql """ - alter table catalog_returns modify column cr_item_sk set stats ('row_count'='143996756', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_reason_sk set stats ('row_count'='143996756', 'ndv'='65', 'num_nulls'='2881950', 'min_value'='1', 'max_value'='65', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_return_ship_cost set stats ('row_count'='143996756', 'ndv'='483467', 'num_nulls'='2883436', 'min_value'='0.00', 'max_value'='14273.28', 'data_size'='575987024') - """ - - sql """ - alter table catalog_returns modify column cr_ship_mode_sk set stats ('row_count'='143996756', 'ndv'='20', 'num_nulls'='2879879', 'min_value'='1', 'max_value'='20', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_store_credit set stats ('row_count'='143996756', 'ndv'='802237', 'num_nulls'='2880469', 'min_value'='0.00', 'max_value'='23215.15', 'data_size'='575987024') - """ - - sql """ - alter table customer_address modify column ca_city set stats ('row_count'='6000000', 'ndv'='977', 'num_nulls'='0', 'min_value'='', 'max_value'='Zion', 'data_size'='52096290') - """ - - sql """ - alter table customer_address modify column ca_state set stats ('row_count'='6000000', 'ndv'='52', 'num_nulls'='0', 'min_value'='', 'max_value'='WY', 'data_size'='11640128') - """ - - sql """ - alter table customer_address modify column ca_street_name set stats ('row_count'='6000000', 'ndv'='8173', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodland Woodland', 'data_size'='50697257') - """ - - sql """ - alter table customer_address modify column ca_street_type set stats ('row_count'='6000000', 'ndv'='21', 'num_nulls'='0', 'min_value'='', 'max_value'='Wy', 'data_size'='24441630') - """ - - sql """ - alter table catalog_page modify column cp_catalog_number set stats ('row_count'='30000', 'ndv'='109', 'num_nulls'='297', 'min_value'='1', 'max_value'='109', 'data_size'='120000') - """ - - sql """ - alter table catalog_page modify column cp_catalog_page_number set stats ('row_count'='30000', 'ndv'='279', 'num_nulls'='294', 'min_value'='1', 'max_value'='277', 'data_size'='120000') - """ - - sql """ - alter table catalog_page modify column cp_catalog_page_sk set stats ('row_count'='30000', 'ndv'='30439', 'num_nulls'='0', 'min_value'='1', 'max_value'='30000', 'data_size'='240000') - """ - - sql """ - alter table catalog_page modify column cp_start_date_sk set stats ('row_count'='30000', 'ndv'='91', 'num_nulls'='286', 'min_value'='2450815', 'max_value'='2453005', 'data_size'='120000') - """ - - sql """ - alter table item modify column i_rec_start_date set stats ('row_count'='300000', 'ndv'='4', 'num_nulls'='784', 'min_value'='1997-10-27', 'max_value'='2001-10-27', 'data_size'='1200000') - """ - - sql """ - alter table item modify column i_units set stats ('row_count'='300000', 'ndv'='22', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='1253652') - """ - - sql """ - alter table web_returns modify column wr_refunded_hdemo_sk set stats ('row_count'='71997522', 'ndv'='7251', 'num_nulls'='3238545', 'min_value'='1', 'max_value'='7200', 'data_size'='575980176') - """ - - sql """ - alter table web_returns modify column wr_return_ship_cost set stats ('row_count'='71997522', 'ndv'='451263', 'num_nulls'='3239048', 'min_value'='0.00', 'max_value'='14352.10', 'data_size'='287990088') - """ - - sql """ - alter table web_returns modify column wr_returned_date_sk set stats ('row_count'='71997522', 'ndv'='2188', 'num_nulls'='3239259', 'min_value'='2450819', 'max_value'='2453002', 'data_size'='575980176') - """ - - sql """ - alter table web_returns modify column wr_returning_cdemo_sk set stats ('row_count'='71997522', 'ndv'='1916366', 'num_nulls'='3239192', 'min_value'='1', 'max_value'='1920800', 'data_size'='575980176') - """ - - sql """ - alter table web_site modify column web_suite_number set stats ('row_count'='54', 'ndv'='38', 'num_nulls'='0', 'min_value'='Suite 100', 'max_value'='Suite Y', 'data_size'='430') - """ - - sql """ - alter table promotion modify column p_start_date_sk set stats ('row_count'='1500', 'ndv'='685', 'num_nulls'='23', 'min_value'='2450096', 'max_value'='2450915', 'data_size'='12000') - """ - - sql """ - alter table web_sales modify column ws_coupon_amt set stats ('row_count'='720000376', 'ndv'='1505315', 'num_nulls'='179933', 'min_value'='0.00', 'max_value'='28824.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_ext_wholesale_cost set stats ('row_count'='720000376', 'ndv'='393180', 'num_nulls'='180060', 'min_value'='1.00', 'max_value'='10000.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_net_paid_inc_ship set stats ('row_count'='720000376', 'ndv'='2414838', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='44263.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_ship_date_sk set stats ('row_count'='720000376', 'ndv'='1952', 'num_nulls'='180011', 'min_value'='2450817', 'max_value'='2452762', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_web_page_sk set stats ('row_count'='720000376', 'ndv'='2984', 'num_nulls'='179732', 'min_value'='1', 'max_value'='3000', 'data_size'='5760003008') - """ - - sql """ - alter table store modify column s_country set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='United States', 'data_size'='12961') - """ - - sql """ - alter table store modify column s_store_name set stats ('row_count'='1002', 'ndv'='11', 'num_nulls'='0', 'min_value'='', 'max_value'='pri', 'data_size'='3916') - """ - - sql """ - alter table time_dim modify column t_second set stats ('row_count'='86400', 'ndv'='60', 'num_nulls'='0', 'min_value'='0', 'max_value'='59', 'data_size'='345600') - """ - - sql """ - alter table time_dim modify column t_sub_shift set stats ('row_count'='86400', 'ndv'='4', 'num_nulls'='0', 'min_value'='afternoon', 'max_value'='night', 'data_size'='597600') - """ - - sql """ - alter table web_page modify column wp_image_count set stats ('row_count'='3000', 'ndv'='7', 'num_nulls'='26', 'min_value'='1', 'max_value'='7', 'data_size'='12000') - """ - - sql """ - alter table web_page modify column wp_type set stats ('row_count'='3000', 'ndv'='8', 'num_nulls'='0', 'min_value'='', 'max_value'='welcome', 'data_size'='18867') - """ - - sql """ - alter table store_returns modify column sr_customer_sk set stats ('row_count'='287999764', 'ndv'='12157481', 'num_nulls'='10081624', 'min_value'='1', 'max_value'='12000000', 'data_size'='2303998112') - """ - - sql """ - alter table store_returns modify column sr_hdemo_sk set stats ('row_count'='287999764', 'ndv'='7251', 'num_nulls'='10083275', 'min_value'='1', 'max_value'='7200', 'data_size'='2303998112') - """ - - sql """ - alter table store_sales modify column ss_addr_sk set stats ('row_count'='2879987999', 'ndv'='6015811', 'num_nulls'='129589799', 'min_value'='1', 'max_value'='6000000', 'data_size'='23039903992') - """ - - sql """ - alter table store_sales modify column ss_item_sk set stats ('row_count'='2879987999', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='23039903992') - """ - - sql """ - alter table store_sales modify column ss_quantity set stats ('row_count'='2879987999', 'ndv'='100', 'num_nulls'='129584258', 'min_value'='1', 'max_value'='100', 'data_size'='11519951996') - """ - - sql """ - alter table store_sales modify column ss_ticket_number set stats ('row_count'='2879987999', 'ndv'='238830448', 'num_nulls'='0', 'min_value'='1', 'max_value'='240000000', 'data_size'='23039903992') - """ - - sql """ - alter table store_sales modify column ss_wholesale_cost set stats ('row_count'='2879987999', 'ndv'='9905', 'num_nulls'='129590273', 'min_value'='1.00', 'max_value'='100.00', 'data_size'='11519951996') - """ - - sql """ - alter table ship_mode modify column sm_type set stats ('row_count'='20', 'ndv'='6', 'num_nulls'='0', 'min_value'='EXPRESS', 'max_value'='TWO DAY', 'data_size'='150') - """ - - sql """ - alter table customer modify column c_current_addr_sk set stats ('row_count'='12000000', 'ndv'='5243359', 'num_nulls'='0', 'min_value'='3', 'max_value'='6000000', 'data_size'='96000000') - """ - - sql """ - alter table customer modify column c_last_name set stats ('row_count'='12000000', 'ndv'='4990', 'num_nulls'='0', 'min_value'='', 'max_value'='Zuniga', 'data_size'='70991730') - """ - - sql """ - alter table dbgen_version modify column dv_cmdline_args set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='-SCALE 1000 -PARALLEL 64 -CHILD 1 -TERMINATE N -DIR /mnt/datadisk0/tpcds1t/tpcds-data', 'max_value'='-SCALE 1000 -PARALLEL 64 -CHILD 1 -TERMINATE N -DIR /mnt/datadisk0/tpcds1t/tpcds-data', 'data_size'='86') - """ - - sql """ - alter table date_dim modify column d_current_quarter set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') - """ - - sql """ - alter table date_dim modify column d_date_sk set stats ('row_count'='73049', 'ndv'='73042', 'num_nulls'='0', 'min_value'='2415022', 'max_value'='2488070', 'data_size'='584392') - """ - - sql """ - alter table date_dim modify column d_holiday set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') - """ - - sql """ - alter table warehouse modify column w_country set stats ('row_count'='20', 'ndv'='1', 'num_nulls'='0', 'min_value'='United States', 'max_value'='United States', 'data_size'='260') - """ - - sql """ - alter table warehouse modify column w_state set stats ('row_count'='20', 'ndv'='13', 'num_nulls'='0', 'min_value'='AL', 'max_value'='TN', 'data_size'='40') - """ - - sql """ - alter table catalog_sales modify column cs_bill_addr_sk set stats ('row_count'='1439980416', 'ndv'='6015811', 'num_nulls'='7199539', 'min_value'='1', 'max_value'='6000000', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_bill_customer_sk set stats ('row_count'='1439980416', 'ndv'='12157481', 'num_nulls'='7201919', 'min_value'='1', 'max_value'='12000000', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_net_paid set stats ('row_count'='1439980416', 'ndv'='1809875', 'num_nulls'='7197668', 'min_value'='0.00', 'max_value'='29943.00', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_ship_addr_sk set stats ('row_count'='1439980416', 'ndv'='6015811', 'num_nulls'='7198232', 'min_value'='1', 'max_value'='6000000', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_ship_mode_sk set stats ('row_count'='1439980416', 'ndv'='20', 'num_nulls'='7201083', 'min_value'='1', 'max_value'='20', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_sold_date_sk set stats ('row_count'='1439980416', 'ndv'='1835', 'num_nulls'='7203326', 'min_value'='2450815', 'max_value'='2452654', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_sold_time_sk set stats ('row_count'='1439980416', 'ndv'='87677', 'num_nulls'='7201329', 'min_value'='0', 'max_value'='86399', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_wholesale_cost set stats ('row_count'='1439980416', 'ndv'='9905', 'num_nulls'='7201098', 'min_value'='1.00', 'max_value'='100.00', 'data_size'='5759921664') - """ - - sql """ - alter table call_center modify column cc_company_name set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='able', 'max_value'='pri', 'data_size'='160') - """ - - sql """ - alter table call_center modify column cc_market_manager set stats ('row_count'='42', 'ndv'='35', 'num_nulls'='0', 'min_value'='Cesar Allen', 'max_value'='William Larsen', 'data_size'='524') - """ - - sql """ - alter table call_center modify column cc_mkt_id set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='168') - """ - - sql """ - alter table call_center modify column cc_street_type set stats ('row_count'='42', 'ndv'='11', 'num_nulls'='0', 'min_value'='Avenue', 'max_value'='Way', 'data_size'='184') - """ - - sql """ - alter table catalog_returns modify column cr_return_tax set stats ('row_count'='143996756', 'ndv'='149828', 'num_nulls'='2881611', 'min_value'='0.00', 'max_value'='2511.58', 'data_size'='575987024') - """ - - sql """ - alter table catalog_returns modify column cr_returning_cdemo_sk set stats ('row_count'='143996756', 'ndv'='1916366', 'num_nulls'='2880543', 'min_value'='1', 'max_value'='1920800', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_returning_hdemo_sk set stats ('row_count'='143996756', 'ndv'='7251', 'num_nulls'='2882692', 'min_value'='1', 'max_value'='7200', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_reversed_charge set stats ('row_count'='143996756', 'ndv'='802509', 'num_nulls'='2881215', 'min_value'='0.00', 'max_value'='24033.84', 'data_size'='575987024') - """ - - sql """ - alter table catalog_returns modify column cr_warehouse_sk set stats ('row_count'='143996756', 'ndv'='20', 'num_nulls'='2882192', 'min_value'='1', 'max_value'='20', 'data_size'='1151974048') - """ - - sql """ - alter table household_demographics modify column hd_demo_sk set stats ('row_count'='7200', 'ndv'='7251', 'num_nulls'='0', 'min_value'='1', 'max_value'='7200', 'data_size'='57600') - """ - - sql """ - alter table household_demographics modify column hd_vehicle_count set stats ('row_count'='7200', 'ndv'='6', 'num_nulls'='0', 'min_value'='-1', 'max_value'='4', 'data_size'='28800') - """ - - sql """ - alter table customer_address modify column ca_zip set stats ('row_count'='6000000', 'ndv'='9253', 'num_nulls'='0', 'min_value'='', 'max_value'='99981', 'data_size'='29097610') - """ - - sql """ - alter table income_band modify column ib_income_band_sk set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='160') - """ - - sql """ - alter table catalog_page modify column cp_type set stats ('row_count'='30000', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='quarterly', 'data_size'='227890') - """ - - sql """ - alter table item modify column i_brand set stats ('row_count'='300000', 'ndv'='714', 'num_nulls'='0', 'min_value'='', 'max_value'='univunivamalg #9', 'data_size'='4834917') - """ - - sql """ - alter table item modify column i_formulation set stats ('row_count'='300000', 'ndv'='224757', 'num_nulls'='0', 'min_value'='', 'max_value'='yellow98911509228741', 'data_size'='5984460') - """ - - sql """ - alter table item modify column i_item_desc set stats ('row_count'='300000', 'ndv'='217721', 'num_nulls'='0', 'min_value'='', 'max_value'='Youngsters used to save quite colour', 'data_size'='30093342') - """ - - sql """ - alter table web_returns modify column wr_fee set stats ('row_count'='71997522', 'ndv'='9958', 'num_nulls'='3238926', 'min_value'='0.50', 'max_value'='100.00', 'data_size'='287990088') - """ - - sql """ - alter table web_returns modify column wr_item_sk set stats ('row_count'='71997522', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='575980176') - """ - - sql """ - alter table web_returns modify column wr_reason_sk set stats ('row_count'='71997522', 'ndv'='65', 'num_nulls'='3238897', 'min_value'='1', 'max_value'='65', 'data_size'='575980176') - """ - - sql """ - alter table web_returns modify column wr_refunded_customer_sk set stats ('row_count'='71997522', 'ndv'='12117831', 'num_nulls'='3242433', 'min_value'='1', 'max_value'='12000000', 'data_size'='575980176') - """ - - sql """ - alter table web_site modify column web_city set stats ('row_count'='54', 'ndv'='31', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodlawn', 'data_size'='491') - """ - - sql """ - alter table web_site modify column web_close_date_sk set stats ('row_count'='54', 'ndv'='18', 'num_nulls'='10', 'min_value'='2441265', 'max_value'='2446218', 'data_size'='432') - """ - - sql """ - alter table web_site modify column web_company_id set stats ('row_count'='54', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='216') - """ - - sql """ - alter table web_site modify column web_company_name set stats ('row_count'='54', 'ndv'='7', 'num_nulls'='0', 'min_value'='', 'max_value'='pri', 'data_size'='203') - """ - - sql """ - alter table web_site modify column web_county set stats ('row_count'='54', 'ndv'='25', 'num_nulls'='0', 'min_value'='', 'max_value'='Williamson County', 'data_size'='762') - """ - - sql """ - alter table web_site modify column web_name set stats ('row_count'='54', 'ndv'='10', 'num_nulls'='0', 'min_value'='', 'max_value'='site_8', 'data_size'='312') - """ - - sql """ - alter table web_site modify column web_open_date_sk set stats ('row_count'='54', 'ndv'='27', 'num_nulls'='1', 'min_value'='2450373', 'max_value'='2450807', 'data_size'='432') - """ - - sql """ - alter table promotion modify column p_channel_dmail set stats ('row_count'='1500', 'ndv'='3', 'num_nulls'='0', 'min_value'='', 'max_value'='Y', 'data_size'='1483') - """ - - sql """ - alter table promotion modify column p_channel_press set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1481') - """ - - sql """ - alter table promotion modify column p_channel_radio set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1479') - """ - - sql """ - alter table promotion modify column p_cost set stats ('row_count'='1500', 'ndv'='1', 'num_nulls'='18', 'min_value'='1000.00', 'max_value'='1000.00', 'data_size'='12000') - """ - - sql """ - alter table web_sales modify column ws_ext_tax set stats ('row_count'='720000376', 'ndv'='211413', 'num_nulls'='179695', 'min_value'='0.00', 'max_value'='2682.90', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_item_sk set stats ('row_count'='720000376', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_net_paid set stats ('row_count'='720000376', 'ndv'='1749360', 'num_nulls'='179970', 'min_value'='0.00', 'max_value'='29810.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_net_paid_inc_ship_tax set stats ('row_count'='720000376', 'ndv'='3224829', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='46004.19', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_net_paid_inc_tax set stats ('row_count'='720000376', 'ndv'='2354996', 'num_nulls'='179972', 'min_value'='0.00', 'max_value'='32492.90', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_order_number set stats ('row_count'='720000376', 'ndv'='60401176', 'num_nulls'='0', 'min_value'='1', 'max_value'='60000000', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_quantity set stats ('row_count'='720000376', 'ndv'='100', 'num_nulls'='179781', 'min_value'='1', 'max_value'='100', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_ship_cdemo_sk set stats ('row_count'='720000376', 'ndv'='1916366', 'num_nulls'='180290', 'min_value'='1', 'max_value'='1920800', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_sold_time_sk set stats ('row_count'='720000376', 'ndv'='87677', 'num_nulls'='179980', 'min_value'='0', 'max_value'='86399', 'data_size'='5760003008') - """ - - sql """ - alter table store modify column s_street_type set stats ('row_count'='1002', 'ndv'='21', 'num_nulls'='0', 'min_value'='', 'max_value'='Wy', 'data_size'='4189') - """ - - sql """ - alter table web_page modify column wp_autogen_flag set stats ('row_count'='3000', 'ndv'='3', 'num_nulls'='0', 'min_value'='', 'max_value'='Y', 'data_size'='2962') - """ - - sql """ - alter table web_page modify column wp_rec_start_date set stats ('row_count'='3000', 'ndv'='4', 'num_nulls'='29', 'min_value'='1997-09-03', 'max_value'='2001-09-03', 'data_size'='12000') - """ - - sql """ - alter table store_returns modify column sr_net_loss set stats ('row_count'='287999764', 'ndv'='714210', 'num_nulls'='10080716', 'min_value'='0.50', 'max_value'='10776.08', 'data_size'='1151999056') - """ - - sql """ - alter table store_returns modify column sr_return_amt_inc_tax set stats ('row_count'='287999764', 'ndv'='1259368', 'num_nulls'='10076879', 'min_value'='0.00', 'max_value'='20454.63', 'data_size'='1151999056') - """ - - sql """ - alter table store_returns modify column sr_return_quantity set stats ('row_count'='287999764', 'ndv'='100', 'num_nulls'='10082815', 'min_value'='1', 'max_value'='100', 'data_size'='1151999056') - """ - - sql """ - alter table store_returns modify column sr_return_ship_cost set stats ('row_count'='287999764', 'ndv'='355844', 'num_nulls'='10081927', 'min_value'='0.00', 'max_value'='9767.34', 'data_size'='1151999056') - """ - - sql """ - alter table store_returns modify column sr_reversed_charge set stats ('row_count'='287999764', 'ndv'='700618', 'num_nulls'='10085976', 'min_value'='0.00', 'max_value'='17339.42', 'data_size'='1151999056') - """ - - sql """ - alter table store_sales modify column ss_net_paid_inc_tax set stats ('row_count'='2879987999', 'ndv'='1681767', 'num_nulls'='129609050', 'min_value'='0.00', 'max_value'='21769.48', 'data_size'='11519951996') - """ - - sql """ - alter table customer modify column c_birth_day set stats ('row_count'='12000000', 'ndv'='31', 'num_nulls'='420361', 'min_value'='1', 'max_value'='31', 'data_size'='48000000') - """ - - sql """ - alter table customer_demographics modify column cd_credit_rating set stats ('row_count'='1920800', 'ndv'='4', 'num_nulls'='0', 'min_value'='Good', 'max_value'='Unknown', 'data_size'='13445600') - """ - - sql """ - alter table customer_demographics modify column cd_demo_sk set stats ('row_count'='1920800', 'ndv'='1916366', 'num_nulls'='0', 'min_value'='1', 'max_value'='1920800', 'data_size'='15366400') - """ - - sql """ - alter table customer_demographics modify column cd_dep_count set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='7683200') - """ - - sql """ - alter table customer_demographics modify column cd_education_status set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='2 yr Degree', 'max_value'='Unknown', 'data_size'='18384800') - """ - - sql """ - alter table customer_demographics modify column cd_gender set stats ('row_count'='1920800', 'ndv'='2', 'num_nulls'='0', 'min_value'='F', 'max_value'='M', 'data_size'='1920800') - """ - - sql """ - alter table customer_demographics modify column cd_marital_status set stats ('row_count'='1920800', 'ndv'='5', 'num_nulls'='0', 'min_value'='D', 'max_value'='W', 'data_size'='1920800') - """ - - sql """ - alter table date_dim modify column d_date_id set stats ('row_count'='73049', 'ndv'='72907', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAAFCAA', 'max_value'='AAAAAAAAPPPPECAA', 'data_size'='1168784') - """ - - sql """ - alter table date_dim modify column d_fy_week_seq set stats ('row_count'='73049', 'ndv'='10448', 'num_nulls'='0', 'min_value'='1', 'max_value'='10436', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_year set stats ('row_count'='73049', 'ndv'='202', 'num_nulls'='0', 'min_value'='1900', 'max_value'='2100', 'data_size'='292196') - """ - - sql """ - alter table warehouse modify column w_warehouse_id set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPAAAAAAA', 'data_size'='320') - """ - - sql """ - alter table catalog_sales modify column cs_ext_list_price set stats ('row_count'='1439980416', 'ndv'='1160303', 'num_nulls'='7199542', 'min_value'='1.00', 'max_value'='30000.00', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_ext_tax set stats ('row_count'='1439980416', 'ndv'='215267', 'num_nulls'='7200412', 'min_value'='0.00', 'max_value'='2673.27', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_quantity set stats ('row_count'='1439980416', 'ndv'='100', 'num_nulls'='7202885', 'min_value'='1', 'max_value'='100', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_ship_cdemo_sk set stats ('row_count'='1439980416', 'ndv'='1916366', 'num_nulls'='7200151', 'min_value'='1', 'max_value'='1920800', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_ship_customer_sk set stats ('row_count'='1439980416', 'ndv'='12157481', 'num_nulls'='7201507', 'min_value'='1', 'max_value'='12000000', 'data_size'='11519843328') - """ - - sql """ - alter table call_center modify column cc_company set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='168') - """ - - sql """ - alter table call_center modify column cc_mkt_desc set stats ('row_count'='42', 'ndv'='33', 'num_nulls'='0', 'min_value'='Arms increase controversial, present so', 'max_value'='Young tests could buy comfortable, local users; o', 'data_size'='2419') - """ - - sql """ - alter table call_center modify column cc_open_date_sk set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='2450794', 'max_value'='2451146', 'data_size'='168') - """ - - sql """ - alter table call_center modify column cc_rec_end_date set stats ('row_count'='42', 'ndv'='3', 'num_nulls'='21', 'min_value'='2000-01-01', 'max_value'='2001-12-31', 'data_size'='168') - """ - - sql """ - alter table catalog_returns modify column cr_order_number set stats ('row_count'='143996756', 'ndv'='93476424', 'num_nulls'='0', 'min_value'='2', 'max_value'='160000000', 'data_size'='1151974048') - """ - - sql """ - alter table catalog_returns modify column cr_return_amount set stats ('row_count'='143996756', 'ndv'='882831', 'num_nulls'='2880424', 'min_value'='0.00', 'max_value'='28805.04', 'data_size'='575987024') - """ - - sql """ - alter table catalog_returns modify column cr_returned_date_sk set stats ('row_count'='143996756', 'ndv'='2108', 'num_nulls'='0', 'min_value'='2450821', 'max_value'='2452924', 'data_size'='1151974048') - """ - - sql """ - alter table income_band modify column ib_upper_bound set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='10000', 'max_value'='200000', 'data_size'='80') - """ - - sql """ - alter table catalog_page modify column cp_department set stats ('row_count'='30000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='DEPARTMENT', 'data_size'='297110') - """ - - sql """ - alter table catalog_page modify column cp_end_date_sk set stats ('row_count'='30000', 'ndv'='97', 'num_nulls'='302', 'min_value'='2450844', 'max_value'='2453186', 'data_size'='120000') - """ - - sql """ - alter table item modify column i_brand_id set stats ('row_count'='300000', 'ndv'='951', 'num_nulls'='763', 'min_value'='1001001', 'max_value'='10016017', 'data_size'='1200000') - """ - - sql """ - alter table item modify column i_category set stats ('row_count'='300000', 'ndv'='11', 'num_nulls'='0', 'min_value'='', 'max_value'='Women', 'data_size'='1766742') - """ - - sql """ - alter table item modify column i_class_id set stats ('row_count'='300000', 'ndv'='16', 'num_nulls'='722', 'min_value'='1', 'max_value'='16', 'data_size'='1200000') - """ - - sql """ - alter table item modify column i_item_sk set stats ('row_count'='300000', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='2400000') - """ - - sql """ - alter table item modify column i_manufact_id set stats ('row_count'='300000', 'ndv'='1005', 'num_nulls'='761', 'min_value'='1', 'max_value'='1000', 'data_size'='1200000') - """ - - sql """ - alter table item modify column i_wholesale_cost set stats ('row_count'='300000', 'ndv'='7243', 'num_nulls'='740', 'min_value'='0.02', 'max_value'='89.49', 'data_size'='1200000') - """ - - sql """ - alter table web_returns modify column wr_refunded_cdemo_sk set stats ('row_count'='71997522', 'ndv'='1916366', 'num_nulls'='3240352', 'min_value'='1', 'max_value'='1920800', 'data_size'='575980176') - """ - - sql """ - alter table web_returns modify column wr_return_tax set stats ('row_count'='71997522', 'ndv'='137392', 'num_nulls'='3237729', 'min_value'='0.00', 'max_value'='2551.16', 'data_size'='287990088') - """ - - sql """ - alter table web_returns modify column wr_returning_hdemo_sk set stats ('row_count'='71997522', 'ndv'='7251', 'num_nulls'='3238239', 'min_value'='1', 'max_value'='7200', 'data_size'='575980176') - """ - - sql """ - alter table web_returns modify column wr_web_page_sk set stats ('row_count'='71997522', 'ndv'='2984', 'num_nulls'='3240387', 'min_value'='1', 'max_value'='3000', 'data_size'='575980176') - """ - - sql """ - alter table web_site modify column web_class set stats ('row_count'='54', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='371') - """ - - sql """ - alter table web_site modify column web_zip set stats ('row_count'='54', 'ndv'='32', 'num_nulls'='0', 'min_value'='14593', 'max_value'='99431', 'data_size'='270') - """ - - sql """ - alter table promotion modify column p_channel_email set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1480') - """ - - sql """ - alter table promotion modify column p_item_sk set stats ('row_count'='1500', 'ndv'='1467', 'num_nulls'='19', 'min_value'='184', 'max_value'='299990', 'data_size'='12000') - """ - - sql """ - alter table promotion modify column p_promo_name set stats ('row_count'='1500', 'ndv'='11', 'num_nulls'='0', 'min_value'='', 'max_value'='pri', 'data_size'='5896') - """ - - sql """ - alter table web_sales modify column ws_ext_discount_amt set stats ('row_count'='720000376', 'ndv'='1093513', 'num_nulls'='179851', 'min_value'='0.00', 'max_value'='29982.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_ext_list_price set stats ('row_count'='720000376', 'ndv'='1160303', 'num_nulls'='179866', 'min_value'='1.00', 'max_value'='30000.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_wholesale_cost set stats ('row_count'='720000376', 'ndv'='9905', 'num_nulls'='179834', 'min_value'='1.00', 'max_value'='100.00', 'data_size'='2880001504') - """ - - sql """ - alter table store modify column s_market_manager set stats ('row_count'='1002', 'ndv'='732', 'num_nulls'='0', 'min_value'='', 'max_value'='Zane Perez', 'data_size'='12823') - """ - - sql """ - alter table store modify column s_number_employees set stats ('row_count'='1002', 'ndv'='101', 'num_nulls'='8', 'min_value'='200', 'max_value'='300', 'data_size'='4008') - """ - - sql """ - alter table store modify column s_rec_end_date set stats ('row_count'='1002', 'ndv'='3', 'num_nulls'='501', 'min_value'='1999-03-13', 'max_value'='2001-03-12', 'data_size'='4008') - """ - - sql """ - alter table store modify column s_rec_start_date set stats ('row_count'='1002', 'ndv'='4', 'num_nulls'='7', 'min_value'='1997-03-13', 'max_value'='2001-03-13', 'data_size'='4008') - """ - - sql """ - alter table store modify column s_suite_number set stats ('row_count'='1002', 'ndv'='76', 'num_nulls'='0', 'min_value'='', 'max_value'='Suite Y', 'data_size'='7866') - """ - - sql """ - alter table time_dim modify column t_hour set stats ('row_count'='86400', 'ndv'='24', 'num_nulls'='0', 'min_value'='0', 'max_value'='23', 'data_size'='345600') - """ - - sql """ - alter table time_dim modify column t_shift set stats ('row_count'='86400', 'ndv'='3', 'num_nulls'='0', 'min_value'='first', 'max_value'='third', 'data_size'='460800') - """ - - sql """ - alter table web_page modify column wp_link_count set stats ('row_count'='3000', 'ndv'='24', 'num_nulls'='27', 'min_value'='2', 'max_value'='25', 'data_size'='12000') - """ - - sql """ - alter table web_page modify column wp_rec_end_date set stats ('row_count'='3000', 'ndv'='3', 'num_nulls'='1500', 'min_value'='1999-09-03', 'max_value'='2001-09-02', 'data_size'='12000') - """ - - sql """ - alter table store_returns modify column sr_cdemo_sk set stats ('row_count'='287999764', 'ndv'='1916366', 'num_nulls'='10076902', 'min_value'='1', 'max_value'='1920800', 'data_size'='2303998112') - """ - - sql """ - alter table store_returns modify column sr_item_sk set stats ('row_count'='287999764', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='2303998112') - """ - - sql """ - alter table store_sales modify column ss_cdemo_sk set stats ('row_count'='2879987999', 'ndv'='1916366', 'num_nulls'='129602155', 'min_value'='1', 'max_value'='1920800', 'data_size'='23039903992') - """ - - sql """ - alter table store_sales modify column ss_ext_discount_amt set stats ('row_count'='2879987999', 'ndv'='1161208', 'num_nulls'='129609101', 'min_value'='0.00', 'max_value'='19778.00', 'data_size'='11519951996') - """ - - sql """ - alter table store_sales modify column ss_ext_wholesale_cost set stats ('row_count'='2879987999', 'ndv'='393180', 'num_nulls'='129595018', 'min_value'='1.00', 'max_value'='10000.00', 'data_size'='11519951996') - """ - - sql """ - alter table store_sales modify column ss_list_price set stats ('row_count'='2879987999', 'ndv'='19640', 'num_nulls'='129597020', 'min_value'='1.00', 'max_value'='200.00', 'data_size'='11519951996') - """ - - sql """ - alter table store_sales modify column ss_net_paid set stats ('row_count'='2879987999', 'ndv'='1288646', 'num_nulls'='129599407', 'min_value'='0.00', 'max_value'='19972.00', 'data_size'='11519951996') - """ - - sql """ - alter table store_sales modify column ss_sold_date_sk set stats ('row_count'='2879987999', 'ndv'='1820', 'num_nulls'='129600843', 'min_value'='2450816', 'max_value'='2452642', 'data_size'='23039903992') - """ - - sql """ - alter table store_sales modify column ss_sold_time_sk set stats ('row_count'='2879987999', 'ndv'='47252', 'num_nulls'='129593012', 'min_value'='28800', 'max_value'='75599', 'data_size'='23039903992') - """ - - sql """ - alter table ship_mode modify column sm_carrier set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='AIRBORNE', 'max_value'='ZOUROS', 'data_size'='133') - """ - - sql """ - alter table customer modify column c_birth_year set stats ('row_count'='12000000', 'ndv'='69', 'num_nulls'='419584', 'min_value'='1924', 'max_value'='1992', 'data_size'='48000000') - """ - - sql """ - alter table customer modify column c_login set stats ('row_count'='12000000', 'ndv'='1', 'num_nulls'='0', 'min_value'='', 'max_value'='', 'data_size'='0') - """ - - sql """ - alter table customer modify column c_salutation set stats ('row_count'='12000000', 'ndv'='7', 'num_nulls'='0', 'min_value'='', 'max_value'='Sir', 'data_size'='37544445') - """ - - sql """ - alter table reason modify column r_reason_desc set stats ('row_count'='65', 'ndv'='64', 'num_nulls'='0', 'min_value'='Did not fit', 'max_value'='unauthoized purchase', 'data_size'='848') - """ - - sql """ - alter table date_dim modify column d_current_year set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') - """ - - sql """ - alter table date_dim modify column d_dom set stats ('row_count'='73049', 'ndv'='31', 'num_nulls'='0', 'min_value'='1', 'max_value'='31', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_same_day_lq set stats ('row_count'='73049', 'ndv'='72231', 'num_nulls'='0', 'min_value'='2414930', 'max_value'='2487978', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_week_seq set stats ('row_count'='73049', 'ndv'='10448', 'num_nulls'='0', 'min_value'='1', 'max_value'='10436', 'data_size'='292196') - """ - - sql """ - alter table date_dim modify column d_weekend set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') - """ - - sql """ - alter table warehouse modify column w_zip set stats ('row_count'='20', 'ndv'='18', 'num_nulls'='0', 'min_value'='19231', 'max_value'='89275', 'data_size'='100') - """ - - sql """ - alter table catalog_sales modify column cs_catalog_page_sk set stats ('row_count'='1439980416', 'ndv'='17005', 'num_nulls'='7199032', 'min_value'='1', 'max_value'='25207', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_coupon_amt set stats ('row_count'='1439980416', 'ndv'='1578778', 'num_nulls'='7198116', 'min_value'='0.00', 'max_value'='28730.00', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_list_price set stats ('row_count'='1439980416', 'ndv'='29396', 'num_nulls'='7201549', 'min_value'='1.00', 'max_value'='300.00', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_net_profit set stats ('row_count'='1439980416', 'ndv'='2058398', 'num_nulls'='0', 'min_value'='-10000.00', 'max_value'='19962.00', 'data_size'='5759921664') - """ - - sql """ - alter table catalog_sales modify column cs_order_number set stats ('row_count'='1439980416', 'ndv'='159051824', 'num_nulls'='0', 'min_value'='1', 'max_value'='160000000', 'data_size'='11519843328') - """ - - sql """ - alter table catalog_sales modify column cs_ship_hdemo_sk set stats ('row_count'='1439980416', 'ndv'='7251', 'num_nulls'='7201542', 'min_value'='1', 'max_value'='7200', 'data_size'='11519843328') - """ - - sql """ - alter table call_center modify column cc_call_center_sk set stats ('row_count'='42', 'ndv'='42', 'num_nulls'='0', 'min_value'='1', 'max_value'='42', 'data_size'='336') - """ - - sql """ - alter table call_center modify column cc_city set stats ('row_count'='42', 'ndv'='17', 'num_nulls'='0', 'min_value'='Antioch', 'max_value'='Spring Hill', 'data_size'='386') - """ - - sql """ - alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168') - """ - - sql """ - alter table call_center modify column cc_gmt_offset set stats ('row_count'='42', 'ndv'='4', 'num_nulls'='0', 'min_value'='-8.00', 'max_value'='-5.00', 'data_size'='168') - """ - - sql """ - alter table call_center modify column cc_hours set stats ('row_count'='42', 'ndv'='3', 'num_nulls'='0', 'min_value'='8AM-12AM', 'max_value'='8AM-8AM', 'data_size'='300') - """ - - sql """ - alter table call_center modify column cc_street_number set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='38', 'max_value'='999', 'data_size'='120') - """ - - sql """ - alter table call_center modify column cc_tax_percentage set stats ('row_count'='42', 'ndv'='12', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='0.12', 'data_size'='168') - """ - - sql """ - alter table inventory modify column inv_date_sk set stats ('row_count'='783000000', 'ndv'='261', 'num_nulls'='0', 'min_value'='2450815', 'max_value'='2452635', 'data_size'='6264000000') - """ - - sql """ - alter table inventory modify column inv_item_sk set stats ('row_count'='783000000', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='6264000000') - """ - - sql """ - alter table catalog_returns modify column cr_fee set stats ('row_count'='143996756', 'ndv'='9958', 'num_nulls'='2882168', 'min_value'='0.50', 'max_value'='100.00', 'data_size'='575987024') - """ - - sql """ - alter table catalog_returns modify column cr_return_quantity set stats ('row_count'='143996756', 'ndv'='100', 'num_nulls'='2878774', 'min_value'='1', 'max_value'='100', 'data_size'='575987024') - """ - - sql """ - alter table catalog_returns modify column cr_returned_time_sk set stats ('row_count'='143996756', 'ndv'='87677', 'num_nulls'='0', 'min_value'='0', 'max_value'='86399', 'data_size'='1151974048') - """ - - sql """ - alter table household_demographics modify column hd_dep_count set stats ('row_count'='7200', 'ndv'='10', 'num_nulls'='0', 'min_value'='0', 'max_value'='9', 'data_size'='28800') - """ - - sql """ - alter table customer_address modify column ca_county set stats ('row_count'='6000000', 'ndv'='1825', 'num_nulls'='0', 'min_value'='', 'max_value'='Ziebach County', 'data_size'='81254984') - """ - - sql """ - alter table income_band modify column ib_lower_bound set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='0', 'max_value'='190001', 'data_size'='80') - """ - - sql """ - alter table item modify column i_category_id set stats ('row_count'='300000', 'ndv'='10', 'num_nulls'='766', 'min_value'='1', 'max_value'='10', 'data_size'='1200000') - """ - - sql """ - alter table item modify column i_class set stats ('row_count'='300000', 'ndv'='100', 'num_nulls'='0', 'min_value'='', 'max_value'='womens watch', 'data_size'='2331199') - """ - - sql """ - alter table item modify column i_container set stats ('row_count'='300000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='2094652') - """ - - sql """ - alter table item modify column i_current_price set stats ('row_count'='300000', 'ndv'='9685', 'num_nulls'='775', 'min_value'='0.09', 'max_value'='99.99', 'data_size'='1200000') - """ - - sql """ - alter table item modify column i_manager_id set stats ('row_count'='300000', 'ndv'='100', 'num_nulls'='744', 'min_value'='1', 'max_value'='100', 'data_size'='1200000') - """ - - sql """ - alter table item modify column i_size set stats ('row_count'='300000', 'ndv'='8', 'num_nulls'='0', 'min_value'='', 'max_value'='small', 'data_size'='1296134') - """ - - sql """ - alter table web_returns modify column wr_order_number set stats ('row_count'='71997522', 'ndv'='42383708', 'num_nulls'='0', 'min_value'='1', 'max_value'='60000000', 'data_size'='575980176') - """ - - sql """ - alter table web_returns modify column wr_refunded_cash set stats ('row_count'='71997522', 'ndv'='955369', 'num_nulls'='3240493', 'min_value'='0.00', 'max_value'='26992.92', 'data_size'='287990088') - """ - - sql """ - alter table web_site modify column web_country set stats ('row_count'='54', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='United States', 'data_size'='689') - """ - - sql """ - alter table web_site modify column web_gmt_offset set stats ('row_count'='54', 'ndv'='4', 'num_nulls'='1', 'min_value'='-8.00', 'max_value'='-5.00', 'data_size'='216') - """ - - sql """ - alter table web_site modify column web_market_manager set stats ('row_count'='54', 'ndv'='46', 'num_nulls'='0', 'min_value'='', 'max_value'='Zachery Oneil', 'data_size'='691') - """ - - sql """ - alter table web_site modify column web_site_sk set stats ('row_count'='54', 'ndv'='54', 'num_nulls'='0', 'min_value'='1', 'max_value'='54', 'data_size'='432') - """ - - sql """ - alter table web_site modify column web_street_name set stats ('row_count'='54', 'ndv'='53', 'num_nulls'='0', 'min_value'='', 'max_value'='Wilson Ridge', 'data_size'='471') - """ - - sql """ - alter table web_site modify column web_tax_percentage set stats ('row_count'='54', 'ndv'='13', 'num_nulls'='1', 'min_value'='0.00', 'max_value'='0.12', 'data_size'='216') - """ - - sql """ - alter table promotion modify column p_channel_tv set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1481') - """ - - sql """ - alter table promotion modify column p_response_targe set stats ('row_count'='1500', 'ndv'='1', 'num_nulls'='27', 'min_value'='1', 'max_value'='1', 'data_size'='6000') - """ - - sql """ - alter table web_sales modify column ws_bill_addr_sk set stats ('row_count'='720000376', 'ndv'='6015742', 'num_nulls'='179648', 'min_value'='1', 'max_value'='6000000', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_ext_sales_price set stats ('row_count'='720000376', 'ndv'='1091003', 'num_nulls'='180023', 'min_value'='0.00', 'max_value'='29810.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_net_profit set stats ('row_count'='720000376', 'ndv'='2014057', 'num_nulls'='0', 'min_value'='-10000.00', 'max_value'='19840.00', 'data_size'='2880001504') - """ - - sql """ - alter table web_sales modify column ws_promo_sk set stats ('row_count'='720000376', 'ndv'='1489', 'num_nulls'='180016', 'min_value'='1', 'max_value'='1500', 'data_size'='5760003008') - """ - - sql """ - alter table web_sales modify column ws_ship_customer_sk set stats ('row_count'='720000376', 'ndv'='12074547', 'num_nulls'='179966', 'min_value'='1', 'max_value'='12000000', 'data_size'='5760003008') - """ - - sql """ - alter table store modify column s_division_name set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='6965') - """ - - sql """ - alter table store modify column s_floor_space set stats ('row_count'='1002', 'ndv'='752', 'num_nulls'='6', 'min_value'='5002549', 'max_value'='9997773', 'data_size'='4008') - """ - - sql """ - alter table store modify column s_tax_percentage set stats ('row_count'='1002', 'ndv'='12', 'num_nulls'='8', 'min_value'='0.00', 'max_value'='0.11', 'data_size'='4008') - """ - - sql """ - alter table time_dim modify column t_time_id set stats ('row_count'='86400', 'ndv'='85663', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAABAAA', 'max_value'='AAAAAAAAPPPPAAAA', 'data_size'='1382400') - """ - - sql """ - alter table time_dim modify column t_time_sk set stats ('row_count'='86400', 'ndv'='87677', 'num_nulls'='0', 'min_value'='0', 'max_value'='86399', 'data_size'='691200') - """ - - sql """ - alter table store_returns modify column sr_fee set stats ('row_count'='287999764', 'ndv'='9958', 'num_nulls'='10081860', 'min_value'='0.50', 'max_value'='100.00', 'data_size'='1151999056') - """ - - sql """ - alter table store_returns modify column sr_reason_sk set stats ('row_count'='287999764', 'ndv'='65', 'num_nulls'='10087936', 'min_value'='1', 'max_value'='65', 'data_size'='2303998112') - """ - - sql """ - alter table store_returns modify column sr_store_credit set stats ('row_count'='287999764', 'ndv'='698161', 'num_nulls'='10077188', 'min_value'='0.00', 'max_value'='17792.48', 'data_size'='1151999056') - """ - - sql """ - alter table store_returns modify column sr_ticket_number set stats ('row_count'='287999764', 'ndv'='168770768', 'num_nulls'='0', 'min_value'='1', 'max_value'='240000000', 'data_size'='2303998112') - """ - - sql """ - alter table store_sales modify column ss_ext_list_price set stats ('row_count'='2879987999', 'ndv'='770971', 'num_nulls'='129593800', 'min_value'='1.00', 'max_value'='20000.00', 'data_size'='11519951996') - """ - - sql """ - alter table store_sales modify column ss_ext_sales_price set stats ('row_count'='2879987999', 'ndv'='754248', 'num_nulls'='129589177', 'min_value'='0.00', 'max_value'='19972.00', 'data_size'='11519951996') - """ - - sql """ - alter table store_sales modify column ss_net_profit set stats ('row_count'='2879987999', 'ndv'='1497362', 'num_nulls'='129572933', 'min_value'='-10000.00', 'max_value'='9986.00', 'data_size'='11519951996') - """ - - sql """ - alter table store_sales modify column ss_promo_sk set stats ('row_count'='2879987999', 'ndv'='1489', 'num_nulls'='129597096', 'min_value'='1', 'max_value'='1500', 'data_size'='23039903992') - """ - - sql """ - alter table ship_mode modify column sm_code set stats ('row_count'='20', 'ndv'='4', 'num_nulls'='0', 'min_value'='AIR', 'max_value'='SURFACE', 'data_size'='87') - """ - - sql """ - alter table ship_mode modify column sm_contract set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='2mM8l', 'max_value'='yVfotg7Tio3MVhBg6Bkn', 'data_size'='252') - """ - - sql """ - alter table customer modify column c_current_hdemo_sk set stats ('row_count'='12000000', 'ndv'='7251', 'num_nulls'='418736', 'min_value'='1', 'max_value'='7200', 'data_size'='96000000') - """ - - sql """ - alter table dbgen_version modify column dv_create_date set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='2023-07-06', 'max_value'='2023-07-06', 'data_size'='4') - """ - - sql """ - alter table dbgen_version modify column dv_create_time set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='2017-05-13 00:00:00', 'max_value'='2017-05-13 00:00:00', 'data_size'='8') - """ -} +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("load") { + String database = context.config.getDbNameByFile(context.file) + sql "drop database if exists ${database}" + sql "create database ${database}" + sql "use ${database}" + + sql ''' + drop table if exists customer_demographics + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS customer_demographics ( + cd_demo_sk int not null, + cd_gender varchar(1), + cd_marital_status varchar(1), + cd_education_status varchar(20), + cd_purchase_estimate integer, + cd_credit_rating varchar(10), + cd_dep_count integer, + cd_dep_employed_count integer, + cd_dep_college_count integer + ) + DUPLICATE KEY(cd_demo_sk) + DISTRIBUTED BY HASH(cd_demo_sk) BUCKETS 9 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists reason + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS reason ( + r_reason_sk int not null, + r_reason_id varchar(16) not null, + r_reason_desc varchar(100) + ) + DUPLICATE KEY(r_reason_sk) + DISTRIBUTED BY HASH(r_reason_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists date_dim + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS date_dim ( + d_date_sk int not null, + d_date_id varchar(16) not null, + d_date datev2, + d_month_seq integer, + d_week_seq integer, + d_quarter_seq integer, + d_year integer, + d_dow integer, + d_moy integer, + d_dom integer, + d_qoy integer, + d_fy_year integer, + d_fy_quarter_seq integer, + d_fy_week_seq integer, + d_day_name varchar(9), + d_quarter_name varchar(6), + d_holiday varchar(1), + d_weekend varchar(1), + d_following_holiday varchar(1), + d_first_dom integer, + d_last_dom integer, + d_same_day_ly integer, + d_same_day_lq integer, + d_current_day varchar(1), + d_current_week varchar(1), + d_current_month varchar(1), + d_current_quarter varchar(1), + d_current_year varchar(1) + ) + DUPLICATE KEY(d_date_sk) + DISTRIBUTED BY HASH(d_date_sk) BUCKETS 9 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists warehouse + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS warehouse ( + w_warehouse_sk int not null, + w_warehouse_id varchar(16) not null, + w_warehouse_name varchar(20), + w_warehouse_sq_ft integer, + w_street_number varchar(10), + w_street_name varchar(60), + w_street_type varchar(15), + w_suite_number varchar(10), + w_city varchar(60), + w_county varchar(30), + w_state varchar(2), + w_zip varchar(10), + w_country varchar(20), + w_gmt_offset decimalv3(5,2) + ) + DUPLICATE KEY(w_warehouse_sk) + DISTRIBUTED BY HASH(w_warehouse_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists catalog_sales + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS catalog_sales ( + cs_sold_date_sk int, + cs_item_sk int not null, + cs_order_number int not null, + cs_sold_time_sk int, + cs_ship_date_sk int, + cs_bill_customer_sk int, + cs_bill_cdemo_sk int, + cs_bill_hdemo_sk int, + cs_bill_addr_sk int, + cs_ship_customer_sk int, + cs_ship_cdemo_sk int, + cs_ship_hdemo_sk int, + cs_ship_addr_sk int, + cs_call_center_sk int, + cs_catalog_page_sk int, + cs_ship_mode_sk int, + cs_warehouse_sk int, + cs_promo_sk int, + cs_quantity int, + cs_wholesale_cost decimalv3(7,2), + cs_list_price decimalv3(7,2), + cs_sales_price decimalv3(7,2), + cs_ext_discount_amt decimalv3(7,2), + cs_ext_sales_price decimalv3(7,2), + cs_ext_wholesale_cost decimalv3(7,2), + cs_ext_list_price decimalv3(7,2), + cs_ext_tax decimalv3(7,2), + cs_coupon_amt decimalv3(7,2), + cs_ext_ship_cost decimalv3(7,2), + cs_net_paid decimalv3(7,2), + cs_net_paid_inc_tax decimalv3(7,2), + cs_net_paid_inc_ship decimalv3(7,2), + cs_net_paid_inc_ship_tax decimalv3(7,2), + cs_net_profit decimalv3(7,2) + ) + DUPLICATE KEY(`cs_sold_date_sk`, `cs_item_sk`, `cs_order_number`) + DISTRIBUTED BY HASH(cs_item_sk, cs_order_number) BUCKETS 261 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists call_center + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS call_center ( + cc_call_center_sk int not null, + cc_call_center_id varchar(16) not null, + cc_rec_start_date date, + cc_rec_end_date date, + cc_closed_date_sk integer, + cc_open_date_sk integer, + cc_name varchar(50), + cc_class varchar(50), + cc_employees integer, + cc_sq_ft integer, + cc_hours varchar(20), + cc_manager varchar(40), + cc_mkt_id integer, + cc_mkt_class varchar(50), + cc_mkt_desc varchar(100), + cc_market_manager varchar(40), + cc_division integer, + cc_division_name varchar(50), + cc_company integer, + cc_company_name varchar(50), + cc_street_number varchar(10), + cc_street_name varchar(60), + cc_street_type varchar(15), + cc_suite_number varchar(10), + cc_city varchar(60), + cc_county varchar(30), + cc_state varchar(2), + cc_zip varchar(10), + cc_country varchar(20), + cc_gmt_offset decimalv3(5,2), + cc_tax_percentage decimalv3(5,2) + ) + DUPLICATE KEY(cc_call_center_sk) + DISTRIBUTED BY HASH(cc_call_center_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists inventory + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS inventory ( + inv_date_sk int not null, + inv_item_sk int not null, + inv_warehouse_sk int, + inv_quantity_on_hand integer + ) + DUPLICATE KEY(inv_date_sk, inv_item_sk, inv_warehouse_sk) + DISTRIBUTED BY HASH(inv_item_sk) BUCKETS 63 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists catalog_returns + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS catalog_returns ( + cr_returned_date_sk int, + cr_item_sk int not null, + cr_order_number int not null, + cr_returned_time_sk int, + cr_refunded_customer_sk int, + cr_refunded_cdemo_sk int, + cr_refunded_hdemo_sk int, + cr_refunded_addr_sk int, + cr_returning_customer_sk int, + cr_returning_cdemo_sk int, + cr_returning_hdemo_sk int, + cr_returning_addr_sk int, + cr_call_center_sk int, + cr_catalog_page_sk int, + cr_ship_mode_sk int, + cr_warehouse_sk int, + cr_reason_sk int, + cr_return_quantity integer, + cr_return_amount decimalv3(7,2), + cr_return_tax decimalv3(7,2), + cr_return_amt_inc_tax decimalv3(7,2), + cr_fee decimalv3(7,2), + cr_return_ship_cost decimalv3(7,2), + cr_refunded_cash decimalv3(7,2), + cr_reversed_charge decimalv3(7,2), + cr_store_credit decimalv3(7,2), + cr_net_loss decimalv3(7,2) + ) + DUPLICATE KEY(`cr_returned_date_sk`, `cr_item_sk`, `cr_order_number`) + DISTRIBUTED BY HASH(cr_item_sk, cr_order_number) BUCKETS 36 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists household_demographics + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS household_demographics ( + hd_demo_sk int not null, + hd_income_band_sk int, + hd_buy_potential varchar(15), + hd_dep_count integer, + hd_vehicle_count integer + ) + DUPLICATE KEY(hd_demo_sk) + DISTRIBUTED BY HASH(hd_demo_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists customer_address + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS customer_address ( + ca_address_sk int not null, + ca_address_id varchar(16) not null, + ca_street_number varchar(10), + ca_street_name varchar(60), + ca_street_type varchar(15), + ca_suite_number varchar(10), + ca_city varchar(60), + ca_county varchar(30), + ca_state varchar(2), + ca_zip varchar(10), + ca_country varchar(20), + ca_gmt_offset decimalv3(5,2), + ca_location_type varchar(20) + ) + DUPLICATE KEY(ca_address_sk) + DISTRIBUTED BY HASH(ca_address_sk) BUCKETS 18 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists income_band + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS income_band ( + ib_income_band_sk int not null, + ib_lower_bound integer, + ib_upper_bound integer + ) + DUPLICATE KEY(ib_income_band_sk) + DISTRIBUTED BY HASH(ib_income_band_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists catalog_page + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS catalog_page ( + cp_catalog_page_sk int not null, + cp_catalog_page_id varchar(16) not null, + cp_start_date_sk integer, + cp_end_date_sk integer, + cp_department varchar(50), + cp_catalog_number integer, + cp_catalog_page_number integer, + cp_description varchar(100), + cp_type varchar(100) + ) + DUPLICATE KEY(cp_catalog_page_sk) + DISTRIBUTED BY HASH(cp_catalog_page_sk) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists item + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS item ( + i_item_sk int not null, + i_item_id varchar(16) not null, + i_rec_start_date datev2, + i_rec_end_date datev2, + i_item_desc varchar(200), + i_current_price decimalv3(7,2), + i_wholesale_cost decimalv3(7,2), + i_brand_id integer, + i_brand varchar(50), + i_class_id integer, + i_class char(50), + i_category_id integer, + i_category varchar(50), + i_manufact_id integer, + i_manufact varchar(50), + i_size varchar(20), + i_formulation varchar(20), + i_color varchar(20), + i_units varchar(10), + i_container varchar(10), + i_manager_id integer, + i_product_name varchar(50) + ) + DUPLICATE KEY(i_item_sk) + DISTRIBUTED BY HASH(i_item_sk) BUCKETS 9 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists web_returns + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS web_returns ( + wr_returned_date_sk int, + wr_item_sk int not null, + wr_order_number int not null, + wr_returned_time_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_return_quantity integer, + wr_return_amt decimalv3(7,2), + wr_return_tax decimalv3(7,2), + wr_return_amt_inc_tax decimalv3(7,2), + wr_fee decimalv3(7,2), + wr_return_ship_cost decimalv3(7,2), + wr_refunded_cash decimalv3(7,2), + wr_reversed_charge decimalv3(7,2), + wr_account_credit decimalv3(7,2), + wr_net_loss decimalv3(7,2) + ) + DUPLICATE KEY(`wr_returned_date_sk`, `wr_item_sk`, `wr_order_number`) + DISTRIBUTED BY HASH(`wr_item_sk`, `wr_order_number`) BUCKETS 18 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists web_site + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS web_site ( + web_site_sk int not null, + web_site_id varchar(16) not null, + web_rec_start_date datev2, + web_rec_end_date datev2, + web_name varchar(50), + web_open_date_sk int, + web_close_date_sk int, + web_class varchar(50), + web_manager varchar(40), + web_mkt_id integer, + web_mkt_class varchar(50), + web_mkt_desc varchar(100), + web_market_manager varchar(40), + web_company_id integer, + web_company_name varchar(50), + web_street_number varchar(10), + web_street_name varchar(60), + web_street_type varchar(15), + web_suite_number varchar(10), + web_city varchar(60), + web_county varchar(30), + web_state varchar(2), + web_zip varchar(10), + web_country varchar(20), + web_gmt_offset decimalv3(5,2), + web_tax_percentage decimalv3(5,2) + ) + DUPLICATE KEY(web_site_sk) + DISTRIBUTED BY HASH(web_site_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists promotion + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS promotion ( + p_promo_sk int not null, + p_promo_id varchar(16) not null, + p_start_date_sk int, + p_end_date_sk int, + p_item_sk int, + p_cost decimalv3(15,2), + p_response_targe integer, + p_promo_name varchar(50), + p_channel_dmail varchar(1), + p_channel_email varchar(1), + p_channel_catalog varchar(1), + p_channel_tv varchar(1), + p_channel_radio varchar(1), + p_channel_press varchar(1), + p_channel_event varchar(1), + p_channel_demo varchar(1), + p_channel_details varchar(100), + p_purpose varchar(15), + p_discount_active varchar(1) + ) + DUPLICATE KEY(p_promo_sk) + DISTRIBUTED BY HASH(p_promo_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists web_sales + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS web_sales ( + ws_sold_date_sk int, + ws_item_sk int not null, + ws_order_number int not null, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_quantity integer, + ws_wholesale_cost decimalv3(7,2), + ws_list_price decimalv3(7,2), + ws_sales_price decimalv3(7,2), + ws_ext_discount_amt decimalv3(7,2), + ws_ext_sales_price decimalv3(7,2), + ws_ext_wholesale_cost decimalv3(7,2), + ws_ext_list_price decimalv3(7,2), + ws_ext_tax decimalv3(7,2), + ws_coupon_amt decimalv3(7,2), + ws_ext_ship_cost decimalv3(7,2), + ws_net_paid decimalv3(7,2), + ws_net_paid_inc_tax decimalv3(7,2), + ws_net_paid_inc_ship decimalv3(7,2), + ws_net_paid_inc_ship_tax decimalv3(7,2), + ws_net_profit decimalv3(7,2) + ) + DUPLICATE KEY(`ws_sold_date_sk`, `ws_item_sk`, `ws_order_number`) + DISTRIBUTED BY HASH(ws_item_sk, ws_order_number) BUCKETS 126 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists store + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS store ( + s_store_sk int not null, + s_store_id varchar(16) not null, + s_rec_start_date datev2, + s_rec_end_date datev2, + s_closed_date_sk int, + s_store_name varchar(50), + s_number_employees integer, + s_floor_space integer, + s_hours varchar(20), + s_manager varchar(40), + s_market_id integer, + s_geography_class varchar(100), + s_market_desc varchar(100), + s_market_manager varchar(40), + s_division_id integer, + s_division_name varchar(50), + s_company_id integer, + s_company_name varchar(50), + s_street_number varchar(10), + s_street_name varchar(60), + s_street_type varchar(15), + s_suite_number varchar(10), + s_city varchar(60), + s_county varchar(30), + s_state varchar(2), + s_zip varchar(10), + s_country varchar(20), + s_gmt_offset decimalv3(5,2), + s_tax_percentage decimalv3(5,2) + ) + DUPLICATE KEY(s_store_sk) + DISTRIBUTED BY HASH(s_store_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists time_dim + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS time_dim ( + t_time_sk int not null, + t_time_id varchar(16) not null, + t_time integer, + t_hour integer, + t_minute integer, + t_second integer, + t_am_pm varchar(2), + t_shift varchar(20), + t_sub_shift varchar(20), + t_meal_time varchar(20) + ) + DUPLICATE KEY(t_time_sk) + DISTRIBUTED BY HASH(t_time_sk) BUCKETS 9 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists web_page + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS web_page ( + wp_web_page_sk int not null, + wp_web_page_id varchar(16) not null, + wp_rec_start_date datev2, + wp_rec_end_date datev2, + wp_creation_date_sk int, + wp_access_date_sk int, + wp_autogen_flag varchar(1), + wp_customer_sk int, + wp_url varchar(100), + wp_type varchar(50), + wp_char_count integer, + wp_link_count integer, + wp_image_count integer, + wp_max_ad_count integer + ) + DUPLICATE KEY(wp_web_page_sk) + DISTRIBUTED BY HASH(wp_web_page_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists store_returns + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS store_returns ( + sr_returned_date_sk int, + sr_item_sk int not null, + sr_ticket_number int not null, + sr_return_time_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_return_quantity integer, + sr_return_amt decimalv3(7,2), + sr_return_tax decimalv3(7,2), + sr_return_amt_inc_tax decimalv3(7,2), + sr_fee decimalv3(7,2), + sr_return_ship_cost decimalv3(7,2), + sr_refunded_cash decimalv3(7,2), + sr_reversed_charge decimalv3(7,2), + sr_store_credit decimalv3(7,2), + sr_net_loss decimalv3(7,2) + ) + duplicate key(`sr_returned_date_sk`, `sr_item_sk`, `sr_ticket_number`) + distributed by hash (sr_item_sk, sr_ticket_number) buckets 36 + properties ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists store_sales + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS store_sales ( + ss_sold_date_sk int, + ss_item_sk int not null, + ss_ticket_number int not null, + ss_sold_time_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_quantity integer, + ss_wholesale_cost decimalv3(7,2), + ss_list_price decimalv3(7,2), + ss_sales_price decimalv3(7,2), + ss_ext_discount_amt decimalv3(7,2), + ss_ext_sales_price decimalv3(7,2), + ss_ext_wholesale_cost decimalv3(7,2), + ss_ext_list_price decimalv3(7,2), + ss_ext_tax decimalv3(7,2), + ss_coupon_amt decimalv3(7,2), + ss_net_paid decimalv3(7,2), + ss_net_paid_inc_tax decimalv3(7,2), + ss_net_profit decimalv3(7,2) + ) + DUPLICATE KEY(`ss_sold_date_sk`, `ss_item_sk`, `ss_ticket_number`) + DISTRIBUTED BY HASH(ss_item_sk, ss_ticket_number) BUCKETS 261 + PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "store" + ) + ''' + + sql ''' + drop table if exists ship_mode + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS ship_mode ( + sm_ship_mode_sk int not null, + sm_ship_mode_id varchar(16) not null, + sm_type varchar(30), + sm_code varchar(10), + sm_carrier varchar(20), + sm_contract varchar(20) + ) + DUPLICATE KEY(sm_ship_mode_sk) + DISTRIBUTED BY HASH(sm_ship_mode_sk) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists customer + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS customer ( + c_customer_sk int not null, + c_customer_id varchar(16) not null, + c_current_cdemo_sk int, + c_current_hdemo_sk int, + c_current_addr_sk int, + c_first_shipto_date_sk int, + c_first_sales_date_sk int, + c_salutation varchar(10), + c_first_name varchar(20), + c_last_name varchar(30), + c_preferred_cust_flag varchar(1), + c_birth_day integer, + c_birth_month integer, + c_birth_year integer, + c_birth_country varchar(20), + c_login varchar(13), + c_email_address varchar(50), + c_last_review_date_sk int + ) + DUPLICATE KEY(c_customer_sk) + DISTRIBUTED BY HASH(c_customer_sk) BUCKETS 18 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + drop table if exists dbgen_version + ''' + + sql ''' + CREATE TABLE IF NOT EXISTS dbgen_version + ( + dv_version varchar(16) , + dv_create_date datev2 , + dv_create_time datetime , + dv_cmdline_args varchar(200) + ) + DUPLICATE KEY(dv_version) + DISTRIBUTED BY HASH(dv_version) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + ''' + + sql ''' + alter table customer add constraint customer_pk primary key (c_customer_sk); + ''' + + sql ''' + alter table customer add constraint customer_uk unique (c_customer_id); + ''' + + sql ''' + alter table store_sales add constraint ss_fk foreign key(ss_customer_sk) references customer(c_customer_sk); + ''' + + sql ''' + alter table web_sales add constraint ws_fk foreign key(ws_bill_customer_sk) references customer(c_customer_sk); + ''' + + sql ''' + alter table catalog_sales add constraint cs_fk foreign key(cs_bill_customer_sk) references customer(c_customer_sk); + ''' + + sql """ + alter table customer_demographics modify column cd_dep_employed_count set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='7683200') + """ + + sql """ + alter table date_dim modify column d_day_name set stats ('row_count'='73049', 'ndv'='7', 'num_nulls'='0', 'min_value'='Friday', 'max_value'='Wednesday', 'data_size'='521779') + """ + + sql """ + alter table date_dim modify column d_following_holiday set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_same_day_ly set stats ('row_count'='73049', 'ndv'='72450', 'num_nulls'='0', 'min_value'='2414657', 'max_value'='2487705', 'data_size'='292196') + """ + + sql """ + alter table warehouse modify column w_city set stats ('row_count'='20', 'ndv'='12', 'num_nulls'='0', 'min_value'='Fairview', 'max_value'='Shiloh', 'data_size'='183') + """ + + sql """ + alter table warehouse modify column w_street_type set stats ('row_count'='20', 'ndv'='14', 'num_nulls'='0', 'min_value'='', 'max_value'='Wy', 'data_size'='71') + """ + + sql """ + alter table catalog_sales modify column cs_call_center_sk set stats ('row_count'='1439980416', 'ndv'='42', 'num_nulls'='7199711', 'min_value'='1', 'max_value'='42', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_net_paid_inc_ship set stats ('row_count'='1439980416', 'ndv'='2505826', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='43956.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_sales_price set stats ('row_count'='1439980416', 'ndv'='29306', 'num_nulls'='7200276', 'min_value'='0.00', 'max_value'='300.00', 'data_size'='5759921664') + """ + + sql """ + alter table call_center modify column cc_class set stats ('row_count'='42', 'ndv'='3', 'num_nulls'='0', 'min_value'='large', 'max_value'='small', 'data_size'='226') + """ + + sql """ + alter table call_center modify column cc_country set stats ('row_count'='42', 'ndv'='1', 'num_nulls'='0', 'min_value'='United States', 'max_value'='United States', 'data_size'='546') + """ + + sql """ + alter table call_center modify column cc_county set stats ('row_count'='42', 'ndv'='16', 'num_nulls'='0', 'min_value'='Barrow County', 'max_value'='Williamson County', 'data_size'='627') + """ + + sql """ + alter table call_center modify column cc_mkt_class set stats ('row_count'='42', 'ndv'='36', 'num_nulls'='0', 'min_value'='A bit narrow forms matter animals. Consist', 'max_value'='Yesterday new men can make moreov', 'data_size'='1465') + """ + + sql """ + alter table call_center modify column cc_sq_ft set stats ('row_count'='42', 'ndv'='31', 'num_nulls'='0', 'min_value'='-1890660328', 'max_value'='2122480316', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_state set stats ('row_count'='42', 'ndv'='14', 'num_nulls'='0', 'min_value'='FL', 'max_value'='WV', 'data_size'='84') + """ + + sql """ + alter table inventory modify column inv_warehouse_sk set stats ('row_count'='783000000', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='6264000000') + """ + + sql """ + alter table catalog_returns modify column cr_refunded_addr_sk set stats ('row_count'='143996756', 'ndv'='6015811', 'num_nulls'='2881609', 'min_value'='1', 'max_value'='6000000', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_refunded_cash set stats ('row_count'='143996756', 'ndv'='1107525', 'num_nulls'='2879192', 'min_value'='0.00', 'max_value'='26955.24', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_refunded_cdemo_sk set stats ('row_count'='143996756', 'ndv'='1916366', 'num_nulls'='2881314', 'min_value'='1', 'max_value'='1920800', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_return_amt_inc_tax set stats ('row_count'='143996756', 'ndv'='1544502', 'num_nulls'='2881886', 'min_value'='0.00', 'max_value'='30418.06', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_returning_addr_sk set stats ('row_count'='143996756', 'ndv'='6015811', 'num_nulls'='2883215', 'min_value'='1', 'max_value'='6000000', 'data_size'='1151974048') + """ + + sql """ + alter table household_demographics modify column hd_buy_potential set stats ('row_count'='7200', 'ndv'='6', 'num_nulls'='0', 'min_value'='0-500', 'max_value'='Unknown', 'data_size'='54000') + """ + + sql """ + alter table customer_address modify column ca_address_id set stats ('row_count'='6000000', 'ndv'='5984931', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAAABAA', 'max_value'='AAAAAAAAPPPPPEAA', 'data_size'='96000000') + """ + + sql """ + alter table customer_address modify column ca_address_sk set stats ('row_count'='6000000', 'ndv'='6015811', 'num_nulls'='0', 'min_value'='1', 'max_value'='6000000', 'data_size'='48000000') + """ + + sql """ + alter table customer_address modify column ca_country set stats ('row_count'='6000000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='United States', 'data_size'='75661794') + """ + + sql """ + alter table customer_address modify column ca_location_type set stats ('row_count'='6000000', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='single family', 'data_size'='52372545') + """ + + sql """ + alter table customer_address modify column ca_street_number set stats ('row_count'='6000000', 'ndv'='1002', 'num_nulls'='0', 'min_value'='', 'max_value'='999', 'data_size'='16837336') + """ + + sql """ + alter table customer_address modify column ca_suite_number set stats ('row_count'='6000000', 'ndv'='76', 'num_nulls'='0', 'min_value'='', 'max_value'='Suite Y', 'data_size'='45911575') + """ + + sql """ + alter table catalog_page modify column cp_catalog_page_id set stats ('row_count'='30000', 'ndv'='29953', 'num_nulls'='0', 'min_value'='AAAAAAAAAAABAAAA', 'max_value'='AAAAAAAAPPPGAAAA', 'data_size'='480000') + """ + + sql """ + alter table item modify column i_rec_end_date set stats ('row_count'='300000', 'ndv'='3', 'num_nulls'='150000', 'min_value'='1999-10-27', 'max_value'='2001-10-26', 'data_size'='1200000') + """ + + sql """ + alter table web_returns modify column wr_refunded_addr_sk set stats ('row_count'='71997522', 'ndv'='6015811', 'num_nulls'='3239971', 'min_value'='1', 'max_value'='6000000', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_reversed_charge set stats ('row_count'='71997522', 'ndv'='692680', 'num_nulls'='3239546', 'min_value'='0.00', 'max_value'='23194.77', 'data_size'='287990088') + """ + + sql """ + alter table web_site modify column web_state set stats ('row_count'='54', 'ndv'='18', 'num_nulls'='0', 'min_value'='AL', 'max_value'='WV', 'data_size'='108') + """ + + sql """ + alter table promotion modify column p_end_date_sk set stats ('row_count'='1500', 'ndv'='683', 'num_nulls'='18', 'min_value'='2450113', 'max_value'='2450967', 'data_size'='12000') + """ + + sql """ + alter table web_sales modify column ws_bill_hdemo_sk set stats ('row_count'='720000376', 'ndv'='7251', 'num_nulls'='180139', 'min_value'='1', 'max_value'='7200', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_ext_ship_cost set stats ('row_count'='720000376', 'ndv'='567477', 'num_nulls'='180084', 'min_value'='0.00', 'max_value'='14950.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ship_addr_sk set stats ('row_count'='720000376', 'ndv'='6015811', 'num_nulls'='179848', 'min_value'='1', 'max_value'='6000000', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_ship_mode_sk set stats ('row_count'='720000376', 'ndv'='20', 'num_nulls'='180017', 'min_value'='1', 'max_value'='20', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_warehouse_sk set stats ('row_count'='720000376', 'ndv'='20', 'num_nulls'='180105', 'min_value'='1', 'max_value'='20', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_company_name set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='6965') + """ + + sql """ + alter table store modify column s_gmt_offset set stats ('row_count'='1002', 'ndv'='4', 'num_nulls'='6', 'min_value'='-8.00', 'max_value'='-5.00', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_manager set stats ('row_count'='1002', 'ndv'='739', 'num_nulls'='0', 'min_value'='', 'max_value'='Zane Clifton', 'data_size'='12649') + """ + + sql """ + alter table store modify column s_street_number set stats ('row_count'='1002', 'ndv'='521', 'num_nulls'='0', 'min_value'='', 'max_value'='999', 'data_size'='2874') + """ + + sql """ + alter table time_dim modify column t_meal_time set stats ('row_count'='86400', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='lunch', 'data_size'='248400') + """ + + sql """ + alter table time_dim modify column t_time set stats ('row_count'='86400', 'ndv'='86684', 'num_nulls'='0', 'min_value'='0', 'max_value'='86399', 'data_size'='345600') + """ + + sql """ + alter table web_page modify column wp_creation_date_sk set stats ('row_count'='3000', 'ndv'='199', 'num_nulls'='33', 'min_value'='2450604', 'max_value'='2450815', 'data_size'='24000') + """ + + sql """ + alter table web_page modify column wp_customer_sk set stats ('row_count'='3000', 'ndv'='713', 'num_nulls'='2147', 'min_value'='9522', 'max_value'='11995685', 'data_size'='24000') + """ + + sql """ + alter table web_page modify column wp_max_ad_count set stats ('row_count'='3000', 'ndv'='5', 'num_nulls'='31', 'min_value'='0', 'max_value'='4', 'data_size'='12000') + """ + + sql """ + alter table web_page modify column wp_url set stats ('row_count'='3000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='http://www.foo.com', 'data_size'='53406') + """ + + sql """ + alter table store_returns modify column sr_refunded_cash set stats ('row_count'='287999764', 'ndv'='928470', 'num_nulls'='10081294', 'min_value'='0.00', 'max_value'='18173.96', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_return_tax set stats ('row_count'='287999764', 'ndv'='117247', 'num_nulls'='10081332', 'min_value'='0.00', 'max_value'='1682.04', 'data_size'='1151999056') + """ + + sql """ + alter table store_sales modify column ss_customer_sk set stats ('row_count'='2879987999', 'ndv'='12157481', 'num_nulls'='129590766', 'min_value'='1', 'max_value'='12000000', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_hdemo_sk set stats ('row_count'='2879987999', 'ndv'='7251', 'num_nulls'='129594559', 'min_value'='1', 'max_value'='7200', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_store_sk set stats ('row_count'='2879987999', 'ndv'='499', 'num_nulls'='129572050', 'min_value'='1', 'max_value'='1000', 'data_size'='23039903992') + """ + + sql """ + alter table ship_mode modify column sm_ship_mode_id set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPAAAAAAA', 'data_size'='320') + """ + + sql """ + alter table ship_mode modify column sm_ship_mode_sk set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='160') + """ + + sql """ + alter table customer modify column c_first_name set stats ('row_count'='12000000', 'ndv'='5140', 'num_nulls'='0', 'min_value'='', 'max_value'='Zulma', 'data_size'='67593278') + """ + + sql """ + alter table customer modify column c_first_sales_date_sk set stats ('row_count'='12000000', 'ndv'='3644', 'num_nulls'='419856', 'min_value'='2448998', 'max_value'='2452648', 'data_size'='96000000') + """ + + sql """ + alter table customer modify column c_first_shipto_date_sk set stats ('row_count'='12000000', 'ndv'='3644', 'num_nulls'='420769', 'min_value'='2449028', 'max_value'='2452678', 'data_size'='96000000') + """ + + sql """ + alter table customer_demographics modify column cd_dep_college_count set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='7683200') + """ + + sql """ + alter table date_dim modify column d_dow set stats ('row_count'='73049', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_fy_quarter_seq set stats ('row_count'='73049', 'ndv'='801', 'num_nulls'='0', 'min_value'='1', 'max_value'='801', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_qoy set stats ('row_count'='73049', 'ndv'='4', 'num_nulls'='0', 'min_value'='1', 'max_value'='4', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_quarter_seq set stats ('row_count'='73049', 'ndv'='801', 'num_nulls'='0', 'min_value'='1', 'max_value'='801', 'data_size'='292196') + """ + + sql """ + alter table warehouse modify column w_street_name set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='', 'max_value'='Wilson Elm', 'data_size'='176') + """ + + sql """ + alter table warehouse modify column w_suite_number set stats ('row_count'='20', 'ndv'='18', 'num_nulls'='0', 'min_value'='', 'max_value'='Suite X', 'data_size'='150') + """ + + sql """ + alter table catalog_sales modify column cs_bill_cdemo_sk set stats ('row_count'='1439980416', 'ndv'='1916366', 'num_nulls'='7202134', 'min_value'='1', 'max_value'='1920800', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_bill_hdemo_sk set stats ('row_count'='1439980416', 'ndv'='7251', 'num_nulls'='7198837', 'min_value'='1', 'max_value'='7200', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_ext_ship_cost set stats ('row_count'='1439980416', 'ndv'='573238', 'num_nulls'='7202537', 'min_value'='0.00', 'max_value'='14994.00', 'data_size'='5759921664') + """ + + sql """ + alter table call_center modify column cc_name set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='California', 'max_value'='Pacific Northwest_2', 'data_size'='572') + """ + + sql """ + alter table call_center modify column cc_street_name set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='1st', 'max_value'='Willow', 'data_size'='356') + """ + + sql """ + alter table call_center modify column cc_zip set stats ('row_count'='42', 'ndv'='19', 'num_nulls'='0', 'min_value'='18605', 'max_value'='98048', 'data_size'='210') + """ + + sql """ + alter table inventory modify column inv_quantity_on_hand set stats ('row_count'='783000000', 'ndv'='1006', 'num_nulls'='39153758', 'min_value'='0', 'max_value'='1000', 'data_size'='3132000000') + """ + + sql """ + alter table catalog_returns modify column cr_catalog_page_sk set stats ('row_count'='143996756', 'ndv'='17005', 'num_nulls'='2882502', 'min_value'='1', 'max_value'='25207', 'data_size'='1151974048') + """ + + sql """ + alter table household_demographics modify column hd_income_band_sk set stats ('row_count'='7200', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='57600') + """ + + sql """ + alter table catalog_page modify column cp_description set stats ('row_count'='30000', 'ndv'='30141', 'num_nulls'='0', 'min_value'='', 'max_value'='Youngsters worry both workers. Fascinating characters take cheap never alive studies. Direct, old', 'data_size'='2215634') + """ + + sql """ + alter table item modify column i_item_id set stats ('row_count'='300000', 'ndv'='150851', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAABAAA', 'max_value'='AAAAAAAAPPPPBAAA', 'data_size'='4800000') + """ + + sql """ + alter table web_returns modify column wr_account_credit set stats ('row_count'='71997522', 'ndv'='683955', 'num_nulls'='3241972', 'min_value'='0.00', 'max_value'='23166.33', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_net_loss set stats ('row_count'='71997522', 'ndv'='815608', 'num_nulls'='3240573', 'min_value'='0.50', 'max_value'='15887.84', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_return_amt set stats ('row_count'='71997522', 'ndv'='808311', 'num_nulls'='3238405', 'min_value'='0.00', 'max_value'='29191.00', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_return_amt_inc_tax set stats ('row_count'='71997522', 'ndv'='1359913', 'num_nulls'='3239765', 'min_value'='0.00', 'max_value'='30393.01', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_return_quantity set stats ('row_count'='71997522', 'ndv'='100', 'num_nulls'='3238643', 'min_value'='1', 'max_value'='100', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_returning_addr_sk set stats ('row_count'='71997522', 'ndv'='6015811', 'num_nulls'='3239658', 'min_value'='1', 'max_value'='6000000', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_returning_customer_sk set stats ('row_count'='71997522', 'ndv'='12119220', 'num_nulls'='3237281', 'min_value'='1', 'max_value'='12000000', 'data_size'='575980176') + """ + + sql """ + alter table web_site modify column web_mkt_desc set stats ('row_count'='54', 'ndv'='38', 'num_nulls'='0', 'min_value'='Acres see else children. Mutual too', 'max_value'='Windows increase to a differences. Other parties might in', 'data_size'='3473') + """ + + sql """ + alter table web_site modify column web_mkt_id set stats ('row_count'='54', 'ndv'='6', 'num_nulls'='1', 'min_value'='1', 'max_value'='6', 'data_size'='216') + """ + + sql """ + alter table web_site modify column web_rec_end_date set stats ('row_count'='54', 'ndv'='3', 'num_nulls'='27', 'min_value'='1999-08-16', 'max_value'='2001-08-15', 'data_size'='216') + """ + + sql """ + alter table web_site modify column web_site_id set stats ('row_count'='54', 'ndv'='27', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPBAAAAAA', 'data_size'='864') + """ + + sql """ + alter table web_site modify column web_street_type set stats ('row_count'='54', 'ndv'='20', 'num_nulls'='0', 'min_value'='Ave', 'max_value'='Wy', 'data_size'='208') + """ + + sql """ + alter table promotion modify column p_channel_demo set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1479') + """ + + sql """ + alter table promotion modify column p_channel_details set stats ('row_count'='1500', 'ndv'='1490', 'num_nulls'='0', 'min_value'='', 'max_value'='Young, valuable companies watch walls. Payments can flour', 'data_size'='59126') + """ + + sql """ + alter table promotion modify column p_channel_event set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1482') + """ + + sql """ + alter table promotion modify column p_discount_active set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1473') + """ + + sql """ + alter table promotion modify column p_promo_sk set stats ('row_count'='1500', 'ndv'='1489', 'num_nulls'='0', 'min_value'='1', 'max_value'='1500', 'data_size'='12000') + """ + + sql """ + alter table promotion modify column p_purpose set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='10374') + """ + + sql """ + alter table web_sales modify column ws_bill_cdemo_sk set stats ('row_count'='720000376', 'ndv'='1916366', 'num_nulls'='179788', 'min_value'='1', 'max_value'='1920800', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_sold_date_sk set stats ('row_count'='720000376', 'ndv'='1820', 'num_nulls'='179921', 'min_value'='2450816', 'max_value'='2452642', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_web_site_sk set stats ('row_count'='720000376', 'ndv'='54', 'num_nulls'='179930', 'min_value'='1', 'max_value'='54', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_city set stats ('row_count'='1002', 'ndv'='55', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodlawn', 'data_size'='9238') + """ + + sql """ + alter table store modify column s_company_id set stats ('row_count'='1002', 'ndv'='1', 'num_nulls'='7', 'min_value'='1', 'max_value'='1', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_county set stats ('row_count'='1002', 'ndv'='28', 'num_nulls'='0', 'min_value'='', 'max_value'='Ziebach County', 'data_size'='14291') + """ + + sql """ + alter table store modify column s_geography_class set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='6972') + """ + + sql """ + alter table store modify column s_hours set stats ('row_count'='1002', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='8AM-8AM', 'data_size'='7088') + """ + + sql """ + alter table store modify column s_store_id set stats ('row_count'='1002', 'ndv'='501', 'num_nulls'='0', 'min_value'='AAAAAAAAAABAAAAA', 'max_value'='AAAAAAAAPPBAAAAA', 'data_size'='16032') + """ + + sql """ + alter table store modify column s_zip set stats ('row_count'='1002', 'ndv'='354', 'num_nulls'='0', 'min_value'='', 'max_value'='99454', 'data_size'='4975') + """ + + sql """ + alter table time_dim modify column t_am_pm set stats ('row_count'='86400', 'ndv'='2', 'num_nulls'='0', 'min_value'='AM', 'max_value'='PM', 'data_size'='172800') + """ + + sql """ + alter table time_dim modify column t_minute set stats ('row_count'='86400', 'ndv'='60', 'num_nulls'='0', 'min_value'='0', 'max_value'='59', 'data_size'='345600') + """ + + sql """ + alter table web_page modify column wp_web_page_id set stats ('row_count'='3000', 'ndv'='1501', 'num_nulls'='0', 'min_value'='AAAAAAAAAABAAAAA', 'max_value'='AAAAAAAAPPKAAAAA', 'data_size'='48000') + """ + + sql """ + alter table web_page modify column wp_web_page_sk set stats ('row_count'='3000', 'ndv'='2984', 'num_nulls'='0', 'min_value'='1', 'max_value'='3000', 'data_size'='24000') + """ + + sql """ + alter table store_returns modify column sr_return_amt set stats ('row_count'='287999764', 'ndv'='671228', 'num_nulls'='10080055', 'min_value'='0.00', 'max_value'='19434.00', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_returned_date_sk set stats ('row_count'='287999764', 'ndv'='2010', 'num_nulls'='10079607', 'min_value'='2450820', 'max_value'='2452822', 'data_size'='2303998112') + """ + + sql """ + alter table store_sales modify column ss_ext_tax set stats ('row_count'='2879987999', 'ndv'='149597', 'num_nulls'='129588732', 'min_value'='0.00', 'max_value'='1797.48', 'data_size'='11519951996') + """ + + sql """ + alter table customer modify column c_current_cdemo_sk set stats ('row_count'='12000000', 'ndv'='1913901', 'num_nulls'='419895', 'min_value'='1', 'max_value'='1920800', 'data_size'='96000000') + """ + + sql """ + alter table customer modify column c_customer_id set stats ('row_count'='12000000', 'ndv'='11921032', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAAABAA', 'max_value'='AAAAAAAAPPPPPKAA', 'data_size'='192000000') + """ + + sql """ + alter table date_dim modify column d_current_day set stats ('row_count'='73049', 'ndv'='1', 'num_nulls'='0', 'min_value'='N', 'max_value'='N', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_current_month set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_date set stats ('row_count'='73049', 'ndv'='73250', 'num_nulls'='0', 'min_value'='1900-01-02', 'max_value'='2100-01-01', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_moy set stats ('row_count'='73049', 'ndv'='12', 'num_nulls'='0', 'min_value'='1', 'max_value'='12', 'data_size'='292196') + """ + + sql """ + alter table warehouse modify column w_gmt_offset set stats ('row_count'='20', 'ndv'='3', 'num_nulls'='1', 'min_value'='-7.00', 'max_value'='-5.00', 'data_size'='80') + """ + + sql """ + alter table warehouse modify column w_warehouse_sk set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='160') + """ + + sql """ + alter table warehouse modify column w_warehouse_sq_ft set stats ('row_count'='20', 'ndv'='19', 'num_nulls'='1', 'min_value'='73065', 'max_value'='977787', 'data_size'='80') + """ + + sql """ + alter table catalog_sales modify column cs_ext_sales_price set stats ('row_count'='1439980416', 'ndv'='1100662', 'num_nulls'='7199625', 'min_value'='0.00', 'max_value'='29943.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_ext_wholesale_cost set stats ('row_count'='1439980416', 'ndv'='393180', 'num_nulls'='7199876', 'min_value'='1.00', 'max_value'='10000.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_item_sk set stats ('row_count'='1439980416', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_net_paid_inc_tax set stats ('row_count'='1439980416', 'ndv'='2422238', 'num_nulls'='7200702', 'min_value'='0.00', 'max_value'='32376.27', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_ship_date_sk set stats ('row_count'='1439980416', 'ndv'='1933', 'num_nulls'='7200707', 'min_value'='2450817', 'max_value'='2452744', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_warehouse_sk set stats ('row_count'='1439980416', 'ndv'='20', 'num_nulls'='7200688', 'min_value'='1', 'max_value'='20', 'data_size'='11519843328') + """ + + sql """ + alter table call_center modify column cc_division set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_division_name set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='able', 'max_value'='pri', 'data_size'='164') + """ + + sql """ + alter table call_center modify column cc_manager set stats ('row_count'='42', 'ndv'='28', 'num_nulls'='0', 'min_value'='Alden Snyder', 'max_value'='Wayne Ray', 'data_size'='519') + """ + + sql """ + alter table call_center modify column cc_rec_start_date set stats ('row_count'='42', 'ndv'='4', 'num_nulls'='0', 'min_value'='1998-01-01', 'max_value'='2002-01-01', 'data_size'='168') + """ + + sql """ + alter table catalog_returns modify column cr_call_center_sk set stats ('row_count'='143996756', 'ndv'='42', 'num_nulls'='2881668', 'min_value'='1', 'max_value'='42', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_net_loss set stats ('row_count'='143996756', 'ndv'='911034', 'num_nulls'='2881704', 'min_value'='0.50', 'max_value'='16095.08', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_refunded_customer_sk set stats ('row_count'='143996756', 'ndv'='12156363', 'num_nulls'='2879017', 'min_value'='1', 'max_value'='12000000', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_refunded_hdemo_sk set stats ('row_count'='143996756', 'ndv'='7251', 'num_nulls'='2882107', 'min_value'='1', 'max_value'='7200', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_returning_customer_sk set stats ('row_count'='143996756', 'ndv'='12157481', 'num_nulls'='2879023', 'min_value'='1', 'max_value'='12000000', 'data_size'='1151974048') + """ + + sql """ + alter table customer_address modify column ca_gmt_offset set stats ('row_count'='6000000', 'ndv'='6', 'num_nulls'='180219', 'min_value'='-10.00', 'max_value'='-5.00', 'data_size'='24000000') + """ + + sql """ + alter table item modify column i_color set stats ('row_count'='300000', 'ndv'='93', 'num_nulls'='0', 'min_value'='', 'max_value'='yellow', 'data_size'='1610293') + """ + + sql """ + alter table item modify column i_manufact set stats ('row_count'='300000', 'ndv'='1004', 'num_nulls'='0', 'min_value'='', 'max_value'='pripripri', 'data_size'='3379693') + """ + + sql """ + alter table item modify column i_product_name set stats ('row_count'='300000', 'ndv'='294994', 'num_nulls'='0', 'min_value'='', 'max_value'='pripripripripriought', 'data_size'='6849199') + """ + + sql """ + alter table web_returns modify column wr_returned_time_sk set stats ('row_count'='71997522', 'ndv'='87677', 'num_nulls'='3238574', 'min_value'='0', 'max_value'='86399', 'data_size'='575980176') + """ + + sql """ + alter table web_site modify column web_manager set stats ('row_count'='54', 'ndv'='40', 'num_nulls'='0', 'min_value'='', 'max_value'='William Young', 'data_size'='658') + """ + + sql """ + alter table web_site modify column web_mkt_class set stats ('row_count'='54', 'ndv'='40', 'num_nulls'='0', 'min_value'='', 'max_value'='Written, political plans show to the models. T', 'data_size'='1822') + """ + + sql """ + alter table web_site modify column web_rec_start_date set stats ('row_count'='54', 'ndv'='4', 'num_nulls'='2', 'min_value'='1997-08-16', 'max_value'='2001-08-16', 'data_size'='216') + """ + + sql """ + alter table web_site modify column web_street_number set stats ('row_count'='54', 'ndv'='36', 'num_nulls'='0', 'min_value'='', 'max_value'='983', 'data_size'='154') + """ + + sql """ + alter table promotion modify column p_channel_catalog set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1482') + """ + + sql """ + alter table promotion modify column p_promo_id set stats ('row_count'='1500', 'ndv'='1519', 'num_nulls'='0', 'min_value'='AAAAAAAAAABAAAAA', 'max_value'='AAAAAAAAPPEAAAAA', 'data_size'='24000') + """ + + sql """ + alter table web_sales modify column ws_bill_customer_sk set stats ('row_count'='720000376', 'ndv'='12103729', 'num_nulls'='179817', 'min_value'='1', 'max_value'='12000000', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_list_price set stats ('row_count'='720000376', 'ndv'='29396', 'num_nulls'='180053', 'min_value'='1.00', 'max_value'='300.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_sales_price set stats ('row_count'='720000376', 'ndv'='29288', 'num_nulls'='180005', 'min_value'='0.00', 'max_value'='300.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ship_hdemo_sk set stats ('row_count'='720000376', 'ndv'='7251', 'num_nulls'='179824', 'min_value'='1', 'max_value'='7200', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_closed_date_sk set stats ('row_count'='1002', 'ndv'='163', 'num_nulls'='729', 'min_value'='2450820', 'max_value'='2451313', 'data_size'='8016') + """ + + sql """ + alter table store modify column s_division_id set stats ('row_count'='1002', 'ndv'='1', 'num_nulls'='6', 'min_value'='1', 'max_value'='1', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_market_desc set stats ('row_count'='1002', 'ndv'='765', 'num_nulls'='0', 'min_value'='', 'max_value'='Yesterday left factors handle continuing co', 'data_size'='57638') + """ + + sql """ + alter table store modify column s_market_id set stats ('row_count'='1002', 'ndv'='10', 'num_nulls'='8', 'min_value'='1', 'max_value'='10', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_state set stats ('row_count'='1002', 'ndv'='22', 'num_nulls'='0', 'min_value'='', 'max_value'='WV', 'data_size'='1994') + """ + + sql """ + alter table store modify column s_store_sk set stats ('row_count'='1002', 'ndv'='988', 'num_nulls'='0', 'min_value'='1', 'max_value'='1002', 'data_size'='8016') + """ + + sql """ + alter table store modify column s_street_name set stats ('row_count'='1002', 'ndv'='549', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodland Oak', 'data_size'='8580') + """ + + sql """ + alter table web_page modify column wp_access_date_sk set stats ('row_count'='3000', 'ndv'='101', 'num_nulls'='31', 'min_value'='2452548', 'max_value'='2452648', 'data_size'='24000') + """ + + sql """ + alter table web_page modify column wp_char_count set stats ('row_count'='3000', 'ndv'='1883', 'num_nulls'='42', 'min_value'='303', 'max_value'='8523', 'data_size'='12000') + """ + + sql """ + alter table store_returns modify column sr_addr_sk set stats ('row_count'='287999764', 'ndv'='6015811', 'num_nulls'='10082311', 'min_value'='1', 'max_value'='6000000', 'data_size'='2303998112') + """ + + sql """ + alter table store_returns modify column sr_return_time_sk set stats ('row_count'='287999764', 'ndv'='32660', 'num_nulls'='10082805', 'min_value'='28799', 'max_value'='61199', 'data_size'='2303998112') + """ + + sql """ + alter table store_returns modify column sr_store_sk set stats ('row_count'='287999764', 'ndv'='499', 'num_nulls'='10081871', 'min_value'='1', 'max_value'='1000', 'data_size'='2303998112') + """ + + sql """ + alter table store_sales modify column ss_coupon_amt set stats ('row_count'='2879987999', 'ndv'='1161208', 'num_nulls'='129609101', 'min_value'='0.00', 'max_value'='19778.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_sales_price set stats ('row_count'='2879987999', 'ndv'='19780', 'num_nulls'='129598061', 'min_value'='0.00', 'max_value'='200.00', 'data_size'='11519951996') + """ + + sql """ + alter table customer modify column c_birth_country set stats ('row_count'='12000000', 'ndv'='211', 'num_nulls'='0', 'min_value'='', 'max_value'='ZIMBABWE', 'data_size'='100750845') + """ + + sql """ + alter table customer modify column c_birth_month set stats ('row_count'='12000000', 'ndv'='12', 'num_nulls'='419629', 'min_value'='1', 'max_value'='12', 'data_size'='48000000') + """ + + sql """ + alter table customer modify column c_customer_sk set stats ('row_count'='12000000', 'ndv'='12157481', 'num_nulls'='0', 'min_value'='1', 'max_value'='12000000', 'data_size'='96000000') + """ + + sql """ + alter table customer modify column c_email_address set stats ('row_count'='12000000', 'ndv'='11642077', 'num_nulls'='0', 'min_value'='', 'max_value'='Zulma.Young@aDhzZzCzYN.edu', 'data_size'='318077849') + """ + + sql """ + alter table customer modify column c_last_review_date_sk set stats ('row_count'='12000000', 'ndv'='366', 'num_nulls'='419900', 'min_value'='2452283', 'max_value'='2452648', 'data_size'='96000000') + """ + + sql """ + alter table customer modify column c_preferred_cust_flag set stats ('row_count'='12000000', 'ndv'='3', 'num_nulls'='0', 'min_value'='', 'max_value'='Y', 'data_size'='11580510') + """ + + sql """ + alter table dbgen_version modify column dv_version set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='3.2.0', 'max_value'='3.2.0', 'data_size'='5') + """ + + sql """ + alter table customer_demographics modify column cd_purchase_estimate set stats ('row_count'='1920800', 'ndv'='20', 'num_nulls'='0', 'min_value'='500', 'max_value'='10000', 'data_size'='7683200') + """ + + sql """ + alter table reason modify column r_reason_id set stats ('row_count'='65', 'ndv'='65', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPDAAAAAA', 'data_size'='1040') + """ + + sql """ + alter table reason modify column r_reason_sk set stats ('row_count'='65', 'ndv'='65', 'num_nulls'='0', 'min_value'='1', 'max_value'='65', 'data_size'='520') + """ + + sql """ + alter table date_dim modify column d_current_week set stats ('row_count'='73049', 'ndv'='1', 'num_nulls'='0', 'min_value'='N', 'max_value'='N', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_first_dom set stats ('row_count'='73049', 'ndv'='2410', 'num_nulls'='0', 'min_value'='2415021', 'max_value'='2488070', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_fy_year set stats ('row_count'='73049', 'ndv'='202', 'num_nulls'='0', 'min_value'='1900', 'max_value'='2100', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_last_dom set stats ('row_count'='73049', 'ndv'='2419', 'num_nulls'='0', 'min_value'='2415020', 'max_value'='2488372', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_month_seq set stats ('row_count'='73049', 'ndv'='2398', 'num_nulls'='0', 'min_value'='0', 'max_value'='2400', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_quarter_name set stats ('row_count'='73049', 'ndv'='799', 'num_nulls'='0', 'min_value'='1900Q1', 'max_value'='2100Q1', 'data_size'='438294') + """ + + sql """ + alter table warehouse modify column w_county set stats ('row_count'='20', 'ndv'='14', 'num_nulls'='0', 'min_value'='Bronx County', 'max_value'='Ziebach County', 'data_size'='291') + """ + + sql """ + alter table warehouse modify column w_street_number set stats ('row_count'='20', 'ndv'='19', 'num_nulls'='0', 'min_value'='', 'max_value'='957', 'data_size'='54') + """ + + sql """ + alter table warehouse modify column w_warehouse_name set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='', 'max_value'='Therefore urg', 'data_size'='307') + """ + + sql """ + alter table catalog_sales modify column cs_ext_discount_amt set stats ('row_count'='1439980416', 'ndv'='1100115', 'num_nulls'='7201054', 'min_value'='0.00', 'max_value'='29982.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_net_paid_inc_ship_tax set stats ('row_count'='1439980416', 'ndv'='3312360', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='46593.36', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_promo_sk set stats ('row_count'='1439980416', 'ndv'='1489', 'num_nulls'='7202844', 'min_value'='1', 'max_value'='1500', 'data_size'='11519843328') + """ + + sql """ + alter table call_center modify column cc_call_center_id set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPBAAAAAA', 'data_size'='672') + """ + + sql """ + alter table call_center modify column cc_employees set stats ('row_count'='42', 'ndv'='30', 'num_nulls'='0', 'min_value'='69020', 'max_value'='6879074', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_suite_number set stats ('row_count'='42', 'ndv'='18', 'num_nulls'='0', 'min_value'='Suite 0', 'max_value'='Suite W', 'data_size'='326') + """ + + sql """ + alter table catalog_returns modify column cr_item_sk set stats ('row_count'='143996756', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_reason_sk set stats ('row_count'='143996756', 'ndv'='65', 'num_nulls'='2881950', 'min_value'='1', 'max_value'='65', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_return_ship_cost set stats ('row_count'='143996756', 'ndv'='483467', 'num_nulls'='2883436', 'min_value'='0.00', 'max_value'='14273.28', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_ship_mode_sk set stats ('row_count'='143996756', 'ndv'='20', 'num_nulls'='2879879', 'min_value'='1', 'max_value'='20', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_store_credit set stats ('row_count'='143996756', 'ndv'='802237', 'num_nulls'='2880469', 'min_value'='0.00', 'max_value'='23215.15', 'data_size'='575987024') + """ + + sql """ + alter table customer_address modify column ca_city set stats ('row_count'='6000000', 'ndv'='977', 'num_nulls'='0', 'min_value'='', 'max_value'='Zion', 'data_size'='52096290') + """ + + sql """ + alter table customer_address modify column ca_state set stats ('row_count'='6000000', 'ndv'='52', 'num_nulls'='0', 'min_value'='', 'max_value'='WY', 'data_size'='11640128') + """ + + sql """ + alter table customer_address modify column ca_street_name set stats ('row_count'='6000000', 'ndv'='8173', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodland Woodland', 'data_size'='50697257') + """ + + sql """ + alter table customer_address modify column ca_street_type set stats ('row_count'='6000000', 'ndv'='21', 'num_nulls'='0', 'min_value'='', 'max_value'='Wy', 'data_size'='24441630') + """ + + sql """ + alter table catalog_page modify column cp_catalog_number set stats ('row_count'='30000', 'ndv'='109', 'num_nulls'='297', 'min_value'='1', 'max_value'='109', 'data_size'='120000') + """ + + sql """ + alter table catalog_page modify column cp_catalog_page_number set stats ('row_count'='30000', 'ndv'='279', 'num_nulls'='294', 'min_value'='1', 'max_value'='277', 'data_size'='120000') + """ + + sql """ + alter table catalog_page modify column cp_catalog_page_sk set stats ('row_count'='30000', 'ndv'='30439', 'num_nulls'='0', 'min_value'='1', 'max_value'='30000', 'data_size'='240000') + """ + + sql """ + alter table catalog_page modify column cp_start_date_sk set stats ('row_count'='30000', 'ndv'='91', 'num_nulls'='286', 'min_value'='2450815', 'max_value'='2453005', 'data_size'='120000') + """ + + sql """ + alter table item modify column i_rec_start_date set stats ('row_count'='300000', 'ndv'='4', 'num_nulls'='784', 'min_value'='1997-10-27', 'max_value'='2001-10-27', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_units set stats ('row_count'='300000', 'ndv'='22', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='1253652') + """ + + sql """ + alter table web_returns modify column wr_refunded_hdemo_sk set stats ('row_count'='71997522', 'ndv'='7251', 'num_nulls'='3238545', 'min_value'='1', 'max_value'='7200', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_return_ship_cost set stats ('row_count'='71997522', 'ndv'='451263', 'num_nulls'='3239048', 'min_value'='0.00', 'max_value'='14352.10', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_returned_date_sk set stats ('row_count'='71997522', 'ndv'='2188', 'num_nulls'='3239259', 'min_value'='2450819', 'max_value'='2453002', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_returning_cdemo_sk set stats ('row_count'='71997522', 'ndv'='1916366', 'num_nulls'='3239192', 'min_value'='1', 'max_value'='1920800', 'data_size'='575980176') + """ + + sql """ + alter table web_site modify column web_suite_number set stats ('row_count'='54', 'ndv'='38', 'num_nulls'='0', 'min_value'='Suite 100', 'max_value'='Suite Y', 'data_size'='430') + """ + + sql """ + alter table promotion modify column p_start_date_sk set stats ('row_count'='1500', 'ndv'='685', 'num_nulls'='23', 'min_value'='2450096', 'max_value'='2450915', 'data_size'='12000') + """ + + sql """ + alter table web_sales modify column ws_coupon_amt set stats ('row_count'='720000376', 'ndv'='1505315', 'num_nulls'='179933', 'min_value'='0.00', 'max_value'='28824.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ext_wholesale_cost set stats ('row_count'='720000376', 'ndv'='393180', 'num_nulls'='180060', 'min_value'='1.00', 'max_value'='10000.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_net_paid_inc_ship set stats ('row_count'='720000376', 'ndv'='2414838', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='44263.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ship_date_sk set stats ('row_count'='720000376', 'ndv'='1952', 'num_nulls'='180011', 'min_value'='2450817', 'max_value'='2452762', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_web_page_sk set stats ('row_count'='720000376', 'ndv'='2984', 'num_nulls'='179732', 'min_value'='1', 'max_value'='3000', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_country set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='United States', 'data_size'='12961') + """ + + sql """ + alter table store modify column s_store_name set stats ('row_count'='1002', 'ndv'='11', 'num_nulls'='0', 'min_value'='', 'max_value'='pri', 'data_size'='3916') + """ + + sql """ + alter table time_dim modify column t_second set stats ('row_count'='86400', 'ndv'='60', 'num_nulls'='0', 'min_value'='0', 'max_value'='59', 'data_size'='345600') + """ + + sql """ + alter table time_dim modify column t_sub_shift set stats ('row_count'='86400', 'ndv'='4', 'num_nulls'='0', 'min_value'='afternoon', 'max_value'='night', 'data_size'='597600') + """ + + sql """ + alter table web_page modify column wp_image_count set stats ('row_count'='3000', 'ndv'='7', 'num_nulls'='26', 'min_value'='1', 'max_value'='7', 'data_size'='12000') + """ + + sql """ + alter table web_page modify column wp_type set stats ('row_count'='3000', 'ndv'='8', 'num_nulls'='0', 'min_value'='', 'max_value'='welcome', 'data_size'='18867') + """ + + sql """ + alter table store_returns modify column sr_customer_sk set stats ('row_count'='287999764', 'ndv'='12157481', 'num_nulls'='10081624', 'min_value'='1', 'max_value'='12000000', 'data_size'='2303998112') + """ + + sql """ + alter table store_returns modify column sr_hdemo_sk set stats ('row_count'='287999764', 'ndv'='7251', 'num_nulls'='10083275', 'min_value'='1', 'max_value'='7200', 'data_size'='2303998112') + """ + + sql """ + alter table store_sales modify column ss_addr_sk set stats ('row_count'='2879987999', 'ndv'='6015811', 'num_nulls'='129589799', 'min_value'='1', 'max_value'='6000000', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_item_sk set stats ('row_count'='2879987999', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_quantity set stats ('row_count'='2879987999', 'ndv'='100', 'num_nulls'='129584258', 'min_value'='1', 'max_value'='100', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_ticket_number set stats ('row_count'='2879987999', 'ndv'='238830448', 'num_nulls'='0', 'min_value'='1', 'max_value'='240000000', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_wholesale_cost set stats ('row_count'='2879987999', 'ndv'='9905', 'num_nulls'='129590273', 'min_value'='1.00', 'max_value'='100.00', 'data_size'='11519951996') + """ + + sql """ + alter table ship_mode modify column sm_type set stats ('row_count'='20', 'ndv'='6', 'num_nulls'='0', 'min_value'='EXPRESS', 'max_value'='TWO DAY', 'data_size'='150') + """ + + sql """ + alter table customer modify column c_current_addr_sk set stats ('row_count'='12000000', 'ndv'='5243359', 'num_nulls'='0', 'min_value'='3', 'max_value'='6000000', 'data_size'='96000000') + """ + + sql """ + alter table customer modify column c_last_name set stats ('row_count'='12000000', 'ndv'='4990', 'num_nulls'='0', 'min_value'='', 'max_value'='Zuniga', 'data_size'='70991730') + """ + + sql """ + alter table dbgen_version modify column dv_cmdline_args set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='-SCALE 1000 -PARALLEL 64 -CHILD 1 -TERMINATE N -DIR /mnt/datadisk0/tpcds1t/tpcds-data', 'max_value'='-SCALE 1000 -PARALLEL 64 -CHILD 1 -TERMINATE N -DIR /mnt/datadisk0/tpcds1t/tpcds-data', 'data_size'='86') + """ + + sql """ + alter table date_dim modify column d_current_quarter set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_date_sk set stats ('row_count'='73049', 'ndv'='73042', 'num_nulls'='0', 'min_value'='2415022', 'max_value'='2488070', 'data_size'='584392') + """ + + sql """ + alter table date_dim modify column d_holiday set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table warehouse modify column w_country set stats ('row_count'='20', 'ndv'='1', 'num_nulls'='0', 'min_value'='United States', 'max_value'='United States', 'data_size'='260') + """ + + sql """ + alter table warehouse modify column w_state set stats ('row_count'='20', 'ndv'='13', 'num_nulls'='0', 'min_value'='AL', 'max_value'='TN', 'data_size'='40') + """ + + sql """ + alter table catalog_sales modify column cs_bill_addr_sk set stats ('row_count'='1439980416', 'ndv'='6015811', 'num_nulls'='7199539', 'min_value'='1', 'max_value'='6000000', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_bill_customer_sk set stats ('row_count'='1439980416', 'ndv'='12157481', 'num_nulls'='7201919', 'min_value'='1', 'max_value'='12000000', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_net_paid set stats ('row_count'='1439980416', 'ndv'='1809875', 'num_nulls'='7197668', 'min_value'='0.00', 'max_value'='29943.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_ship_addr_sk set stats ('row_count'='1439980416', 'ndv'='6015811', 'num_nulls'='7198232', 'min_value'='1', 'max_value'='6000000', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_ship_mode_sk set stats ('row_count'='1439980416', 'ndv'='20', 'num_nulls'='7201083', 'min_value'='1', 'max_value'='20', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_sold_date_sk set stats ('row_count'='1439980416', 'ndv'='1835', 'num_nulls'='7203326', 'min_value'='2450815', 'max_value'='2452654', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_sold_time_sk set stats ('row_count'='1439980416', 'ndv'='87677', 'num_nulls'='7201329', 'min_value'='0', 'max_value'='86399', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_wholesale_cost set stats ('row_count'='1439980416', 'ndv'='9905', 'num_nulls'='7201098', 'min_value'='1.00', 'max_value'='100.00', 'data_size'='5759921664') + """ + + sql """ + alter table call_center modify column cc_company_name set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='able', 'max_value'='pri', 'data_size'='160') + """ + + sql """ + alter table call_center modify column cc_market_manager set stats ('row_count'='42', 'ndv'='35', 'num_nulls'='0', 'min_value'='Cesar Allen', 'max_value'='William Larsen', 'data_size'='524') + """ + + sql """ + alter table call_center modify column cc_mkt_id set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_street_type set stats ('row_count'='42', 'ndv'='11', 'num_nulls'='0', 'min_value'='Avenue', 'max_value'='Way', 'data_size'='184') + """ + + sql """ + alter table catalog_returns modify column cr_return_tax set stats ('row_count'='143996756', 'ndv'='149828', 'num_nulls'='2881611', 'min_value'='0.00', 'max_value'='2511.58', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_returning_cdemo_sk set stats ('row_count'='143996756', 'ndv'='1916366', 'num_nulls'='2880543', 'min_value'='1', 'max_value'='1920800', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_returning_hdemo_sk set stats ('row_count'='143996756', 'ndv'='7251', 'num_nulls'='2882692', 'min_value'='1', 'max_value'='7200', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_reversed_charge set stats ('row_count'='143996756', 'ndv'='802509', 'num_nulls'='2881215', 'min_value'='0.00', 'max_value'='24033.84', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_warehouse_sk set stats ('row_count'='143996756', 'ndv'='20', 'num_nulls'='2882192', 'min_value'='1', 'max_value'='20', 'data_size'='1151974048') + """ + + sql """ + alter table household_demographics modify column hd_demo_sk set stats ('row_count'='7200', 'ndv'='7251', 'num_nulls'='0', 'min_value'='1', 'max_value'='7200', 'data_size'='57600') + """ + + sql """ + alter table household_demographics modify column hd_vehicle_count set stats ('row_count'='7200', 'ndv'='6', 'num_nulls'='0', 'min_value'='-1', 'max_value'='4', 'data_size'='28800') + """ + + sql """ + alter table customer_address modify column ca_zip set stats ('row_count'='6000000', 'ndv'='9253', 'num_nulls'='0', 'min_value'='', 'max_value'='99981', 'data_size'='29097610') + """ + + sql """ + alter table income_band modify column ib_income_band_sk set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='1', 'max_value'='20', 'data_size'='160') + """ + + sql """ + alter table catalog_page modify column cp_type set stats ('row_count'='30000', 'ndv'='4', 'num_nulls'='0', 'min_value'='', 'max_value'='quarterly', 'data_size'='227890') + """ + + sql """ + alter table item modify column i_brand set stats ('row_count'='300000', 'ndv'='714', 'num_nulls'='0', 'min_value'='', 'max_value'='univunivamalg #9', 'data_size'='4834917') + """ + + sql """ + alter table item modify column i_formulation set stats ('row_count'='300000', 'ndv'='224757', 'num_nulls'='0', 'min_value'='', 'max_value'='yellow98911509228741', 'data_size'='5984460') + """ + + sql """ + alter table item modify column i_item_desc set stats ('row_count'='300000', 'ndv'='217721', 'num_nulls'='0', 'min_value'='', 'max_value'='Youngsters used to save quite colour', 'data_size'='30093342') + """ + + sql """ + alter table web_returns modify column wr_fee set stats ('row_count'='71997522', 'ndv'='9958', 'num_nulls'='3238926', 'min_value'='0.50', 'max_value'='100.00', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_item_sk set stats ('row_count'='71997522', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_reason_sk set stats ('row_count'='71997522', 'ndv'='65', 'num_nulls'='3238897', 'min_value'='1', 'max_value'='65', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_refunded_customer_sk set stats ('row_count'='71997522', 'ndv'='12117831', 'num_nulls'='3242433', 'min_value'='1', 'max_value'='12000000', 'data_size'='575980176') + """ + + sql """ + alter table web_site modify column web_city set stats ('row_count'='54', 'ndv'='31', 'num_nulls'='0', 'min_value'='', 'max_value'='Woodlawn', 'data_size'='491') + """ + + sql """ + alter table web_site modify column web_close_date_sk set stats ('row_count'='54', 'ndv'='18', 'num_nulls'='10', 'min_value'='2441265', 'max_value'='2446218', 'data_size'='432') + """ + + sql """ + alter table web_site modify column web_company_id set stats ('row_count'='54', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='216') + """ + + sql """ + alter table web_site modify column web_company_name set stats ('row_count'='54', 'ndv'='7', 'num_nulls'='0', 'min_value'='', 'max_value'='pri', 'data_size'='203') + """ + + sql """ + alter table web_site modify column web_county set stats ('row_count'='54', 'ndv'='25', 'num_nulls'='0', 'min_value'='', 'max_value'='Williamson County', 'data_size'='762') + """ + + sql """ + alter table web_site modify column web_name set stats ('row_count'='54', 'ndv'='10', 'num_nulls'='0', 'min_value'='', 'max_value'='site_8', 'data_size'='312') + """ + + sql """ + alter table web_site modify column web_open_date_sk set stats ('row_count'='54', 'ndv'='27', 'num_nulls'='1', 'min_value'='2450373', 'max_value'='2450807', 'data_size'='432') + """ + + sql """ + alter table promotion modify column p_channel_dmail set stats ('row_count'='1500', 'ndv'='3', 'num_nulls'='0', 'min_value'='', 'max_value'='Y', 'data_size'='1483') + """ + + sql """ + alter table promotion modify column p_channel_press set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1481') + """ + + sql """ + alter table promotion modify column p_channel_radio set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1479') + """ + + sql """ + alter table promotion modify column p_cost set stats ('row_count'='1500', 'ndv'='1', 'num_nulls'='18', 'min_value'='1000.00', 'max_value'='1000.00', 'data_size'='12000') + """ + + sql """ + alter table web_sales modify column ws_ext_tax set stats ('row_count'='720000376', 'ndv'='211413', 'num_nulls'='179695', 'min_value'='0.00', 'max_value'='2682.90', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_item_sk set stats ('row_count'='720000376', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_net_paid set stats ('row_count'='720000376', 'ndv'='1749360', 'num_nulls'='179970', 'min_value'='0.00', 'max_value'='29810.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_net_paid_inc_ship_tax set stats ('row_count'='720000376', 'ndv'='3224829', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='46004.19', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_net_paid_inc_tax set stats ('row_count'='720000376', 'ndv'='2354996', 'num_nulls'='179972', 'min_value'='0.00', 'max_value'='32492.90', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_order_number set stats ('row_count'='720000376', 'ndv'='60401176', 'num_nulls'='0', 'min_value'='1', 'max_value'='60000000', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_quantity set stats ('row_count'='720000376', 'ndv'='100', 'num_nulls'='179781', 'min_value'='1', 'max_value'='100', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ship_cdemo_sk set stats ('row_count'='720000376', 'ndv'='1916366', 'num_nulls'='180290', 'min_value'='1', 'max_value'='1920800', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_sold_time_sk set stats ('row_count'='720000376', 'ndv'='87677', 'num_nulls'='179980', 'min_value'='0', 'max_value'='86399', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_street_type set stats ('row_count'='1002', 'ndv'='21', 'num_nulls'='0', 'min_value'='', 'max_value'='Wy', 'data_size'='4189') + """ + + sql """ + alter table web_page modify column wp_autogen_flag set stats ('row_count'='3000', 'ndv'='3', 'num_nulls'='0', 'min_value'='', 'max_value'='Y', 'data_size'='2962') + """ + + sql """ + alter table web_page modify column wp_rec_start_date set stats ('row_count'='3000', 'ndv'='4', 'num_nulls'='29', 'min_value'='1997-09-03', 'max_value'='2001-09-03', 'data_size'='12000') + """ + + sql """ + alter table store_returns modify column sr_net_loss set stats ('row_count'='287999764', 'ndv'='714210', 'num_nulls'='10080716', 'min_value'='0.50', 'max_value'='10776.08', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_return_amt_inc_tax set stats ('row_count'='287999764', 'ndv'='1259368', 'num_nulls'='10076879', 'min_value'='0.00', 'max_value'='20454.63', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_return_quantity set stats ('row_count'='287999764', 'ndv'='100', 'num_nulls'='10082815', 'min_value'='1', 'max_value'='100', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_return_ship_cost set stats ('row_count'='287999764', 'ndv'='355844', 'num_nulls'='10081927', 'min_value'='0.00', 'max_value'='9767.34', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_reversed_charge set stats ('row_count'='287999764', 'ndv'='700618', 'num_nulls'='10085976', 'min_value'='0.00', 'max_value'='17339.42', 'data_size'='1151999056') + """ + + sql """ + alter table store_sales modify column ss_net_paid_inc_tax set stats ('row_count'='2879987999', 'ndv'='1681767', 'num_nulls'='129609050', 'min_value'='0.00', 'max_value'='21769.48', 'data_size'='11519951996') + """ + + sql """ + alter table customer modify column c_birth_day set stats ('row_count'='12000000', 'ndv'='31', 'num_nulls'='420361', 'min_value'='1', 'max_value'='31', 'data_size'='48000000') + """ + + sql """ + alter table customer_demographics modify column cd_credit_rating set stats ('row_count'='1920800', 'ndv'='4', 'num_nulls'='0', 'min_value'='Good', 'max_value'='Unknown', 'data_size'='13445600') + """ + + sql """ + alter table customer_demographics modify column cd_demo_sk set stats ('row_count'='1920800', 'ndv'='1916366', 'num_nulls'='0', 'min_value'='1', 'max_value'='1920800', 'data_size'='15366400') + """ + + sql """ + alter table customer_demographics modify column cd_dep_count set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='0', 'max_value'='6', 'data_size'='7683200') + """ + + sql """ + alter table customer_demographics modify column cd_education_status set stats ('row_count'='1920800', 'ndv'='7', 'num_nulls'='0', 'min_value'='2 yr Degree', 'max_value'='Unknown', 'data_size'='18384800') + """ + + sql """ + alter table customer_demographics modify column cd_gender set stats ('row_count'='1920800', 'ndv'='2', 'num_nulls'='0', 'min_value'='F', 'max_value'='M', 'data_size'='1920800') + """ + + sql """ + alter table customer_demographics modify column cd_marital_status set stats ('row_count'='1920800', 'ndv'='5', 'num_nulls'='0', 'min_value'='D', 'max_value'='W', 'data_size'='1920800') + """ + + sql """ + alter table date_dim modify column d_date_id set stats ('row_count'='73049', 'ndv'='72907', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAAFCAA', 'max_value'='AAAAAAAAPPPPECAA', 'data_size'='1168784') + """ + + sql """ + alter table date_dim modify column d_fy_week_seq set stats ('row_count'='73049', 'ndv'='10448', 'num_nulls'='0', 'min_value'='1', 'max_value'='10436', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_year set stats ('row_count'='73049', 'ndv'='202', 'num_nulls'='0', 'min_value'='1900', 'max_value'='2100', 'data_size'='292196') + """ + + sql """ + alter table warehouse modify column w_warehouse_id set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='AAAAAAAAABAAAAAA', 'max_value'='AAAAAAAAPAAAAAAA', 'data_size'='320') + """ + + sql """ + alter table catalog_sales modify column cs_ext_list_price set stats ('row_count'='1439980416', 'ndv'='1160303', 'num_nulls'='7199542', 'min_value'='1.00', 'max_value'='30000.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_ext_tax set stats ('row_count'='1439980416', 'ndv'='215267', 'num_nulls'='7200412', 'min_value'='0.00', 'max_value'='2673.27', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_quantity set stats ('row_count'='1439980416', 'ndv'='100', 'num_nulls'='7202885', 'min_value'='1', 'max_value'='100', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_ship_cdemo_sk set stats ('row_count'='1439980416', 'ndv'='1916366', 'num_nulls'='7200151', 'min_value'='1', 'max_value'='1920800', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_ship_customer_sk set stats ('row_count'='1439980416', 'ndv'='12157481', 'num_nulls'='7201507', 'min_value'='1', 'max_value'='12000000', 'data_size'='11519843328') + """ + + sql """ + alter table call_center modify column cc_company set stats ('row_count'='42', 'ndv'='6', 'num_nulls'='0', 'min_value'='1', 'max_value'='6', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_mkt_desc set stats ('row_count'='42', 'ndv'='33', 'num_nulls'='0', 'min_value'='Arms increase controversial, present so', 'max_value'='Young tests could buy comfortable, local users; o', 'data_size'='2419') + """ + + sql """ + alter table call_center modify column cc_open_date_sk set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='2450794', 'max_value'='2451146', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_rec_end_date set stats ('row_count'='42', 'ndv'='3', 'num_nulls'='21', 'min_value'='2000-01-01', 'max_value'='2001-12-31', 'data_size'='168') + """ + + sql """ + alter table catalog_returns modify column cr_order_number set stats ('row_count'='143996756', 'ndv'='93476424', 'num_nulls'='0', 'min_value'='2', 'max_value'='160000000', 'data_size'='1151974048') + """ + + sql """ + alter table catalog_returns modify column cr_return_amount set stats ('row_count'='143996756', 'ndv'='882831', 'num_nulls'='2880424', 'min_value'='0.00', 'max_value'='28805.04', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_returned_date_sk set stats ('row_count'='143996756', 'ndv'='2108', 'num_nulls'='0', 'min_value'='2450821', 'max_value'='2452924', 'data_size'='1151974048') + """ + + sql """ + alter table income_band modify column ib_upper_bound set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='10000', 'max_value'='200000', 'data_size'='80') + """ + + sql """ + alter table catalog_page modify column cp_department set stats ('row_count'='30000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='DEPARTMENT', 'data_size'='297110') + """ + + sql """ + alter table catalog_page modify column cp_end_date_sk set stats ('row_count'='30000', 'ndv'='97', 'num_nulls'='302', 'min_value'='2450844', 'max_value'='2453186', 'data_size'='120000') + """ + + sql """ + alter table item modify column i_brand_id set stats ('row_count'='300000', 'ndv'='951', 'num_nulls'='763', 'min_value'='1001001', 'max_value'='10016017', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_category set stats ('row_count'='300000', 'ndv'='11', 'num_nulls'='0', 'min_value'='', 'max_value'='Women', 'data_size'='1766742') + """ + + sql """ + alter table item modify column i_class_id set stats ('row_count'='300000', 'ndv'='16', 'num_nulls'='722', 'min_value'='1', 'max_value'='16', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_item_sk set stats ('row_count'='300000', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='2400000') + """ + + sql """ + alter table item modify column i_manufact_id set stats ('row_count'='300000', 'ndv'='1005', 'num_nulls'='761', 'min_value'='1', 'max_value'='1000', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_wholesale_cost set stats ('row_count'='300000', 'ndv'='7243', 'num_nulls'='740', 'min_value'='0.02', 'max_value'='89.49', 'data_size'='1200000') + """ + + sql """ + alter table web_returns modify column wr_refunded_cdemo_sk set stats ('row_count'='71997522', 'ndv'='1916366', 'num_nulls'='3240352', 'min_value'='1', 'max_value'='1920800', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_return_tax set stats ('row_count'='71997522', 'ndv'='137392', 'num_nulls'='3237729', 'min_value'='0.00', 'max_value'='2551.16', 'data_size'='287990088') + """ + + sql """ + alter table web_returns modify column wr_returning_hdemo_sk set stats ('row_count'='71997522', 'ndv'='7251', 'num_nulls'='3238239', 'min_value'='1', 'max_value'='7200', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_web_page_sk set stats ('row_count'='71997522', 'ndv'='2984', 'num_nulls'='3240387', 'min_value'='1', 'max_value'='3000', 'data_size'='575980176') + """ + + sql """ + alter table web_site modify column web_class set stats ('row_count'='54', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='371') + """ + + sql """ + alter table web_site modify column web_zip set stats ('row_count'='54', 'ndv'='32', 'num_nulls'='0', 'min_value'='14593', 'max_value'='99431', 'data_size'='270') + """ + + sql """ + alter table promotion modify column p_channel_email set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1480') + """ + + sql """ + alter table promotion modify column p_item_sk set stats ('row_count'='1500', 'ndv'='1467', 'num_nulls'='19', 'min_value'='184', 'max_value'='299990', 'data_size'='12000') + """ + + sql """ + alter table promotion modify column p_promo_name set stats ('row_count'='1500', 'ndv'='11', 'num_nulls'='0', 'min_value'='', 'max_value'='pri', 'data_size'='5896') + """ + + sql """ + alter table web_sales modify column ws_ext_discount_amt set stats ('row_count'='720000376', 'ndv'='1093513', 'num_nulls'='179851', 'min_value'='0.00', 'max_value'='29982.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_ext_list_price set stats ('row_count'='720000376', 'ndv'='1160303', 'num_nulls'='179866', 'min_value'='1.00', 'max_value'='30000.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_wholesale_cost set stats ('row_count'='720000376', 'ndv'='9905', 'num_nulls'='179834', 'min_value'='1.00', 'max_value'='100.00', 'data_size'='2880001504') + """ + + sql """ + alter table store modify column s_market_manager set stats ('row_count'='1002', 'ndv'='732', 'num_nulls'='0', 'min_value'='', 'max_value'='Zane Perez', 'data_size'='12823') + """ + + sql """ + alter table store modify column s_number_employees set stats ('row_count'='1002', 'ndv'='101', 'num_nulls'='8', 'min_value'='200', 'max_value'='300', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_rec_end_date set stats ('row_count'='1002', 'ndv'='3', 'num_nulls'='501', 'min_value'='1999-03-13', 'max_value'='2001-03-12', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_rec_start_date set stats ('row_count'='1002', 'ndv'='4', 'num_nulls'='7', 'min_value'='1997-03-13', 'max_value'='2001-03-13', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_suite_number set stats ('row_count'='1002', 'ndv'='76', 'num_nulls'='0', 'min_value'='', 'max_value'='Suite Y', 'data_size'='7866') + """ + + sql """ + alter table time_dim modify column t_hour set stats ('row_count'='86400', 'ndv'='24', 'num_nulls'='0', 'min_value'='0', 'max_value'='23', 'data_size'='345600') + """ + + sql """ + alter table time_dim modify column t_shift set stats ('row_count'='86400', 'ndv'='3', 'num_nulls'='0', 'min_value'='first', 'max_value'='third', 'data_size'='460800') + """ + + sql """ + alter table web_page modify column wp_link_count set stats ('row_count'='3000', 'ndv'='24', 'num_nulls'='27', 'min_value'='2', 'max_value'='25', 'data_size'='12000') + """ + + sql """ + alter table web_page modify column wp_rec_end_date set stats ('row_count'='3000', 'ndv'='3', 'num_nulls'='1500', 'min_value'='1999-09-03', 'max_value'='2001-09-02', 'data_size'='12000') + """ + + sql """ + alter table store_returns modify column sr_cdemo_sk set stats ('row_count'='287999764', 'ndv'='1916366', 'num_nulls'='10076902', 'min_value'='1', 'max_value'='1920800', 'data_size'='2303998112') + """ + + sql """ + alter table store_returns modify column sr_item_sk set stats ('row_count'='287999764', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='2303998112') + """ + + sql """ + alter table store_sales modify column ss_cdemo_sk set stats ('row_count'='2879987999', 'ndv'='1916366', 'num_nulls'='129602155', 'min_value'='1', 'max_value'='1920800', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_ext_discount_amt set stats ('row_count'='2879987999', 'ndv'='1161208', 'num_nulls'='129609101', 'min_value'='0.00', 'max_value'='19778.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_ext_wholesale_cost set stats ('row_count'='2879987999', 'ndv'='393180', 'num_nulls'='129595018', 'min_value'='1.00', 'max_value'='10000.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_list_price set stats ('row_count'='2879987999', 'ndv'='19640', 'num_nulls'='129597020', 'min_value'='1.00', 'max_value'='200.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_net_paid set stats ('row_count'='2879987999', 'ndv'='1288646', 'num_nulls'='129599407', 'min_value'='0.00', 'max_value'='19972.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_sold_date_sk set stats ('row_count'='2879987999', 'ndv'='1820', 'num_nulls'='129600843', 'min_value'='2450816', 'max_value'='2452642', 'data_size'='23039903992') + """ + + sql """ + alter table store_sales modify column ss_sold_time_sk set stats ('row_count'='2879987999', 'ndv'='47252', 'num_nulls'='129593012', 'min_value'='28800', 'max_value'='75599', 'data_size'='23039903992') + """ + + sql """ + alter table ship_mode modify column sm_carrier set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='AIRBORNE', 'max_value'='ZOUROS', 'data_size'='133') + """ + + sql """ + alter table customer modify column c_birth_year set stats ('row_count'='12000000', 'ndv'='69', 'num_nulls'='419584', 'min_value'='1924', 'max_value'='1992', 'data_size'='48000000') + """ + + sql """ + alter table customer modify column c_login set stats ('row_count'='12000000', 'ndv'='1', 'num_nulls'='0', 'min_value'='', 'max_value'='', 'data_size'='0') + """ + + sql """ + alter table customer modify column c_salutation set stats ('row_count'='12000000', 'ndv'='7', 'num_nulls'='0', 'min_value'='', 'max_value'='Sir', 'data_size'='37544445') + """ + + sql """ + alter table reason modify column r_reason_desc set stats ('row_count'='65', 'ndv'='64', 'num_nulls'='0', 'min_value'='Did not fit', 'max_value'='unauthoized purchase', 'data_size'='848') + """ + + sql """ + alter table date_dim modify column d_current_year set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table date_dim modify column d_dom set stats ('row_count'='73049', 'ndv'='31', 'num_nulls'='0', 'min_value'='1', 'max_value'='31', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_same_day_lq set stats ('row_count'='73049', 'ndv'='72231', 'num_nulls'='0', 'min_value'='2414930', 'max_value'='2487978', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_week_seq set stats ('row_count'='73049', 'ndv'='10448', 'num_nulls'='0', 'min_value'='1', 'max_value'='10436', 'data_size'='292196') + """ + + sql """ + alter table date_dim modify column d_weekend set stats ('row_count'='73049', 'ndv'='2', 'num_nulls'='0', 'min_value'='N', 'max_value'='Y', 'data_size'='73049') + """ + + sql """ + alter table warehouse modify column w_zip set stats ('row_count'='20', 'ndv'='18', 'num_nulls'='0', 'min_value'='19231', 'max_value'='89275', 'data_size'='100') + """ + + sql """ + alter table catalog_sales modify column cs_catalog_page_sk set stats ('row_count'='1439980416', 'ndv'='17005', 'num_nulls'='7199032', 'min_value'='1', 'max_value'='25207', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_coupon_amt set stats ('row_count'='1439980416', 'ndv'='1578778', 'num_nulls'='7198116', 'min_value'='0.00', 'max_value'='28730.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_list_price set stats ('row_count'='1439980416', 'ndv'='29396', 'num_nulls'='7201549', 'min_value'='1.00', 'max_value'='300.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_net_profit set stats ('row_count'='1439980416', 'ndv'='2058398', 'num_nulls'='0', 'min_value'='-10000.00', 'max_value'='19962.00', 'data_size'='5759921664') + """ + + sql """ + alter table catalog_sales modify column cs_order_number set stats ('row_count'='1439980416', 'ndv'='159051824', 'num_nulls'='0', 'min_value'='1', 'max_value'='160000000', 'data_size'='11519843328') + """ + + sql """ + alter table catalog_sales modify column cs_ship_hdemo_sk set stats ('row_count'='1439980416', 'ndv'='7251', 'num_nulls'='7201542', 'min_value'='1', 'max_value'='7200', 'data_size'='11519843328') + """ + + sql """ + alter table call_center modify column cc_call_center_sk set stats ('row_count'='42', 'ndv'='42', 'num_nulls'='0', 'min_value'='1', 'max_value'='42', 'data_size'='336') + """ + + sql """ + alter table call_center modify column cc_city set stats ('row_count'='42', 'ndv'='17', 'num_nulls'='0', 'min_value'='Antioch', 'max_value'='Spring Hill', 'data_size'='386') + """ + + sql """ + alter table call_center modify column cc_closed_date_sk set stats ('row_count'='42', 'ndv'='0', 'num_nulls'='42', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_gmt_offset set stats ('row_count'='42', 'ndv'='4', 'num_nulls'='0', 'min_value'='-8.00', 'max_value'='-5.00', 'data_size'='168') + """ + + sql """ + alter table call_center modify column cc_hours set stats ('row_count'='42', 'ndv'='3', 'num_nulls'='0', 'min_value'='8AM-12AM', 'max_value'='8AM-8AM', 'data_size'='300') + """ + + sql """ + alter table call_center modify column cc_street_number set stats ('row_count'='42', 'ndv'='21', 'num_nulls'='0', 'min_value'='38', 'max_value'='999', 'data_size'='120') + """ + + sql """ + alter table call_center modify column cc_tax_percentage set stats ('row_count'='42', 'ndv'='12', 'num_nulls'='0', 'min_value'='0.00', 'max_value'='0.12', 'data_size'='168') + """ + + sql """ + alter table inventory modify column inv_date_sk set stats ('row_count'='783000000', 'ndv'='261', 'num_nulls'='0', 'min_value'='2450815', 'max_value'='2452635', 'data_size'='6264000000') + """ + + sql """ + alter table inventory modify column inv_item_sk set stats ('row_count'='783000000', 'ndv'='295433', 'num_nulls'='0', 'min_value'='1', 'max_value'='300000', 'data_size'='6264000000') + """ + + sql """ + alter table catalog_returns modify column cr_fee set stats ('row_count'='143996756', 'ndv'='9958', 'num_nulls'='2882168', 'min_value'='0.50', 'max_value'='100.00', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_return_quantity set stats ('row_count'='143996756', 'ndv'='100', 'num_nulls'='2878774', 'min_value'='1', 'max_value'='100', 'data_size'='575987024') + """ + + sql """ + alter table catalog_returns modify column cr_returned_time_sk set stats ('row_count'='143996756', 'ndv'='87677', 'num_nulls'='0', 'min_value'='0', 'max_value'='86399', 'data_size'='1151974048') + """ + + sql """ + alter table household_demographics modify column hd_dep_count set stats ('row_count'='7200', 'ndv'='10', 'num_nulls'='0', 'min_value'='0', 'max_value'='9', 'data_size'='28800') + """ + + sql """ + alter table customer_address modify column ca_county set stats ('row_count'='6000000', 'ndv'='1825', 'num_nulls'='0', 'min_value'='', 'max_value'='Ziebach County', 'data_size'='81254984') + """ + + sql """ + alter table income_band modify column ib_lower_bound set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='0', 'max_value'='190001', 'data_size'='80') + """ + + sql """ + alter table item modify column i_category_id set stats ('row_count'='300000', 'ndv'='10', 'num_nulls'='766', 'min_value'='1', 'max_value'='10', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_class set stats ('row_count'='300000', 'ndv'='100', 'num_nulls'='0', 'min_value'='', 'max_value'='womens watch', 'data_size'='2331199') + """ + + sql """ + alter table item modify column i_container set stats ('row_count'='300000', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='2094652') + """ + + sql """ + alter table item modify column i_current_price set stats ('row_count'='300000', 'ndv'='9685', 'num_nulls'='775', 'min_value'='0.09', 'max_value'='99.99', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_manager_id set stats ('row_count'='300000', 'ndv'='100', 'num_nulls'='744', 'min_value'='1', 'max_value'='100', 'data_size'='1200000') + """ + + sql """ + alter table item modify column i_size set stats ('row_count'='300000', 'ndv'='8', 'num_nulls'='0', 'min_value'='', 'max_value'='small', 'data_size'='1296134') + """ + + sql """ + alter table web_returns modify column wr_order_number set stats ('row_count'='71997522', 'ndv'='42383708', 'num_nulls'='0', 'min_value'='1', 'max_value'='60000000', 'data_size'='575980176') + """ + + sql """ + alter table web_returns modify column wr_refunded_cash set stats ('row_count'='71997522', 'ndv'='955369', 'num_nulls'='3240493', 'min_value'='0.00', 'max_value'='26992.92', 'data_size'='287990088') + """ + + sql """ + alter table web_site modify column web_country set stats ('row_count'='54', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='United States', 'data_size'='689') + """ + + sql """ + alter table web_site modify column web_gmt_offset set stats ('row_count'='54', 'ndv'='4', 'num_nulls'='1', 'min_value'='-8.00', 'max_value'='-5.00', 'data_size'='216') + """ + + sql """ + alter table web_site modify column web_market_manager set stats ('row_count'='54', 'ndv'='46', 'num_nulls'='0', 'min_value'='', 'max_value'='Zachery Oneil', 'data_size'='691') + """ + + sql """ + alter table web_site modify column web_site_sk set stats ('row_count'='54', 'ndv'='54', 'num_nulls'='0', 'min_value'='1', 'max_value'='54', 'data_size'='432') + """ + + sql """ + alter table web_site modify column web_street_name set stats ('row_count'='54', 'ndv'='53', 'num_nulls'='0', 'min_value'='', 'max_value'='Wilson Ridge', 'data_size'='471') + """ + + sql """ + alter table web_site modify column web_tax_percentage set stats ('row_count'='54', 'ndv'='13', 'num_nulls'='1', 'min_value'='0.00', 'max_value'='0.12', 'data_size'='216') + """ + + sql """ + alter table promotion modify column p_channel_tv set stats ('row_count'='1500', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='N', 'data_size'='1481') + """ + + sql """ + alter table promotion modify column p_response_targe set stats ('row_count'='1500', 'ndv'='1', 'num_nulls'='27', 'min_value'='1', 'max_value'='1', 'data_size'='6000') + """ + + sql """ + alter table web_sales modify column ws_bill_addr_sk set stats ('row_count'='720000376', 'ndv'='6015742', 'num_nulls'='179648', 'min_value'='1', 'max_value'='6000000', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_ext_sales_price set stats ('row_count'='720000376', 'ndv'='1091003', 'num_nulls'='180023', 'min_value'='0.00', 'max_value'='29810.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_net_profit set stats ('row_count'='720000376', 'ndv'='2014057', 'num_nulls'='0', 'min_value'='-10000.00', 'max_value'='19840.00', 'data_size'='2880001504') + """ + + sql """ + alter table web_sales modify column ws_promo_sk set stats ('row_count'='720000376', 'ndv'='1489', 'num_nulls'='180016', 'min_value'='1', 'max_value'='1500', 'data_size'='5760003008') + """ + + sql """ + alter table web_sales modify column ws_ship_customer_sk set stats ('row_count'='720000376', 'ndv'='12074547', 'num_nulls'='179966', 'min_value'='1', 'max_value'='12000000', 'data_size'='5760003008') + """ + + sql """ + alter table store modify column s_division_name set stats ('row_count'='1002', 'ndv'='2', 'num_nulls'='0', 'min_value'='', 'max_value'='Unknown', 'data_size'='6965') + """ + + sql """ + alter table store modify column s_floor_space set stats ('row_count'='1002', 'ndv'='752', 'num_nulls'='6', 'min_value'='5002549', 'max_value'='9997773', 'data_size'='4008') + """ + + sql """ + alter table store modify column s_tax_percentage set stats ('row_count'='1002', 'ndv'='12', 'num_nulls'='8', 'min_value'='0.00', 'max_value'='0.11', 'data_size'='4008') + """ + + sql """ + alter table time_dim modify column t_time_id set stats ('row_count'='86400', 'ndv'='85663', 'num_nulls'='0', 'min_value'='AAAAAAAAAAAABAAA', 'max_value'='AAAAAAAAPPPPAAAA', 'data_size'='1382400') + """ + + sql """ + alter table time_dim modify column t_time_sk set stats ('row_count'='86400', 'ndv'='87677', 'num_nulls'='0', 'min_value'='0', 'max_value'='86399', 'data_size'='691200') + """ + + sql """ + alter table store_returns modify column sr_fee set stats ('row_count'='287999764', 'ndv'='9958', 'num_nulls'='10081860', 'min_value'='0.50', 'max_value'='100.00', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_reason_sk set stats ('row_count'='287999764', 'ndv'='65', 'num_nulls'='10087936', 'min_value'='1', 'max_value'='65', 'data_size'='2303998112') + """ + + sql """ + alter table store_returns modify column sr_store_credit set stats ('row_count'='287999764', 'ndv'='698161', 'num_nulls'='10077188', 'min_value'='0.00', 'max_value'='17792.48', 'data_size'='1151999056') + """ + + sql """ + alter table store_returns modify column sr_ticket_number set stats ('row_count'='287999764', 'ndv'='168770768', 'num_nulls'='0', 'min_value'='1', 'max_value'='240000000', 'data_size'='2303998112') + """ + + sql """ + alter table store_sales modify column ss_ext_list_price set stats ('row_count'='2879987999', 'ndv'='770971', 'num_nulls'='129593800', 'min_value'='1.00', 'max_value'='20000.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_ext_sales_price set stats ('row_count'='2879987999', 'ndv'='754248', 'num_nulls'='129589177', 'min_value'='0.00', 'max_value'='19972.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_net_profit set stats ('row_count'='2879987999', 'ndv'='1497362', 'num_nulls'='129572933', 'min_value'='-10000.00', 'max_value'='9986.00', 'data_size'='11519951996') + """ + + sql """ + alter table store_sales modify column ss_promo_sk set stats ('row_count'='2879987999', 'ndv'='1489', 'num_nulls'='129597096', 'min_value'='1', 'max_value'='1500', 'data_size'='23039903992') + """ + + sql """ + alter table ship_mode modify column sm_code set stats ('row_count'='20', 'ndv'='4', 'num_nulls'='0', 'min_value'='AIR', 'max_value'='SURFACE', 'data_size'='87') + """ + + sql """ + alter table ship_mode modify column sm_contract set stats ('row_count'='20', 'ndv'='20', 'num_nulls'='0', 'min_value'='2mM8l', 'max_value'='yVfotg7Tio3MVhBg6Bkn', 'data_size'='252') + """ + + sql """ + alter table customer modify column c_current_hdemo_sk set stats ('row_count'='12000000', 'ndv'='7251', 'num_nulls'='418736', 'min_value'='1', 'max_value'='7200', 'data_size'='96000000') + """ + + sql """ + alter table dbgen_version modify column dv_create_date set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='2023-07-06', 'max_value'='2023-07-06', 'data_size'='4') + """ + + sql """ + alter table dbgen_version modify column dv_create_time set stats ('row_count'='1', 'ndv'='1', 'num_nulls'='0', 'min_value'='2017-05-13 00:00:00', 'max_value'='2017-05-13 00:00:00', 'data_size'='8') + """ +} From f23ee82a42ec84b4542561f212883a29e4600152 Mon Sep 17 00:00:00 2001 From: englefly Date: Wed, 7 Jan 2026 13:08:56 +0800 Subject: [PATCH 07/21] =?UTF-8?q?1.=20remove=20finalGroupKeys,=202.=20proj?= =?UTF-8?q?ect=20=E4=B8=8B=E6=8E=A8=E5=90=8E=E6=94=B9=E5=86=99projects?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../eageraggregation/EagerAggRewriter.java | 30 +++++++++---------- .../eageraggregation/PushDownAggContext.java | 11 ------- 2 files changed, 14 insertions(+), 27 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java index db3c85e030e403..9e2fd245035ddc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java @@ -145,13 +145,11 @@ public Plan visitLogicalJoin(LogicalJoin join, P if (toLeft) { Plan newLeft = join.left().accept(this, childContext); if (newLeft != join.left()) { - context.getFinalGroupKeys().addAll(childContext.getFinalGroupKeys()); return join.withChildren(newLeft, join.right()); } } else { Plan newRight = join.right().accept(this, childContext); if (newRight != join.right()) { - context.getFinalGroupKeys().addAll(childContext.getFinalGroupKeys()); return join.withChildren(join.left(), newRight); } } @@ -245,7 +243,6 @@ public Plan visitLogicalProject(LogicalProject project, PushDown PushDownAggContext newContext = createContextFromProject(project, context); Plan newChild = project.child().accept(this, newContext); if (newChild != project.child()) { - context.getFinalGroupKeys().addAll(newContext.getFinalGroupKeys()); /* * agg[sum(a), groupBy(b)] * -> proj(a, b1+b2 as b) @@ -254,29 +251,33 @@ public Plan visitLogicalProject(LogicalProject project, PushDown * -> any(d, ...) * => * agg[sum(x), groupBy(b)] - * -> proj(x, b) + * -> proj(x, b1+b2 as b) * -> join(c=d) - * ->agg[sum(a) as x, groupBy(b, c)] - * ->proj(a, b1+b2 as b, c, ...) + * ->agg[sum(a) as x, groupBy(b1, b2, c)] + * ->proj(a, b1, b2, c, ...) * -> any(a, b1, b2, c) * -> any(d, ...) */ Set aggFuncInputSlots = context.getAggFunctionsInputSlots(); List newProjections = new ArrayList<>(); for (NamedExpression ne : project.getProjects()) { - if (aggFuncInputSlots.contains(ne.toSlot())) { - // ne (a) is replaced by alias slot (x) - continue; - } else if (context.getFinalGroupKeys().contains(ne.toSlot())) { - newProjections.add(ne.toSlot()); - } else { + if (newChild.getOutputSet().containsAll(ne.getInputSlots())) { newProjections.add(ne); + } else { + if (!(ne instanceof SlotReference && aggFuncInputSlots.contains((SlotReference) ne))) { + if (SessionVariable.isFeDebug()) { + throw new RuntimeException("push down Agg failed: " + ne + " is not in project \n" + + project.treeString()); + } else { + return project; + } + } } } for (Alias alias : context.getAliasMap().values()) { newProjections.add(alias.toSlot()); } - for (SlotReference key : context.getFinalGroupKeys()) { + for (SlotReference key : context.getGroupKeys()) { if (!newProjections.contains(key)) { newProjections.add(key); } @@ -308,9 +309,6 @@ private Plan genAggregate(Plan child, PushDownAggContext context) { List aggOutputExpressions = new ArrayList<>(); aggOutputExpressions.addAll(context.getAliasMap().values()); aggOutputExpressions.addAll(context.getGroupKeys()); - for (NamedExpression key : context.getGroupKeys()) { - context.addFinalGroupKey((SlotReference) key.toSlot()); - } LogicalAggregate genAgg = new LogicalAggregate(context.getGroupKeys(), aggOutputExpressions, child); NormalizeAggregate normalizeAggregate = new NormalizeAggregate(); return normalizeAggregate.normalizeAgg(genAgg, Optional.empty(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java index f61d06e6519374..8ee05609733d3b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java @@ -42,9 +42,6 @@ public class PushDownAggContext { private final Map aliasMap; private final Set aggFunctionsInputSlots; - // the group keys that eventually used to generate aggregation node - private final LinkedHashSet finalGroupKeys = new LinkedHashSet<>(); - // cascadesContext is used for normalizeAgg private final CascadesContext cascadesContext; @@ -111,14 +108,6 @@ public Set getAggFunctionsInputSlots() { return aggFunctionsInputSlots; } - public LinkedHashSet getFinalGroupKeys() { - return finalGroupKeys; - } - - public void addFinalGroupKey(SlotReference key) { - this.finalGroupKeys.add(key); - } - public CascadesContext getCascadesContext() { return cascadesContext; } From 3c9685abf7353375abd7be57d788b3525db73eaa Mon Sep 17 00:00:00 2001 From: englefly Date: Wed, 7 Jan 2026 13:24:11 +0800 Subject: [PATCH 08/21] throw exception for eager agg when FeDebug --- .../rewrite/eageraggregation/PushDownAggregation.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java index f38b634ff40ec4..7d5ab6e4c17b48 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java @@ -137,7 +137,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte PushDownAggContext pushDownContext = new PushDownAggContext(new ArrayList<>(aggFunctions), groupKeys, context.getCascadesContext()); - //try { + try { Plan child = agg.child().accept(writer, pushDownContext); if (child != agg.child()) { // agg has been pushed down, rewrite agg output expressions @@ -176,9 +176,12 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte AdjustNullable adjustNullable = new AdjustNullable(false, false); return adjustNullable.rewriteRoot(normalized, null); } - //} catch (RuntimeException e) { - // LOG.info("PushDownAggregation failed: " + e.getMessage() + "\n" + agg.treeString()); - //} + } catch (RuntimeException e) { + LOG.info("PushDownAggregation failed: " + e.getMessage() + "\n" + agg.treeString()); + if (SessionVariable.isFeDebug()) { + throw e; + } + } return agg; } From fd30767a628bc841e374bc4bcb3310a0206f9d6a Mon Sep 17 00:00:00 2001 From: englefly Date: Wed, 7 Jan 2026 21:08:14 +0800 Subject: [PATCH 09/21] derive deep false --- .../rules/rewrite/eageraggregation/EagerAggRewriter.java | 2 +- .../eageraggregation/PushdownSumIfAggregation.java | 1 + .../rules/rewrite/eageraggregation/SumAggContext.java | 8 -------- .../rules/rewrite/eageraggregation/SumAggWriter.java | 2 +- 4 files changed, 3 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java index 9e2fd245035ddc..28523d6d98e5d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java @@ -66,7 +66,7 @@ public class EagerAggRewriter extends DefaultPlanRewriter { private static final double LOWER_AGGREGATE_EFFECT_COEFFICIENT = 10000; private static final double LOW_AGGREGATE_EFFECT_COEFFICIENT = 1000; private static final double MEDIUM_AGGREGATE_EFFECT_COEFFICIENT = 100; - private final StatsDerive derive = new StatsDerive(true); + private final StatsDerive derive = new StatsDerive(false); @Override public Plan visitLogicalJoin(LogicalJoin join, PushDownAggContext context) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java index 4432d4ead75132..57465c5f982e0d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java @@ -41,6 +41,7 @@ /** * sum(if t1.a then t2.b) + * tpcds 2 and 59 query can be rewritten */ public class PushdownSumIfAggregation extends DefaultPlanRewriter implements CustomRewriter { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java index 7b3e7ee948276c..b868a2177e4bc7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java @@ -45,12 +45,4 @@ public SumAggContext(List aliasToBePushDown, this.ifThenSlots = ImmutableList.copyOf(distinct); this.groupKeys = ImmutableList.copyOf(groupKeys); } - - public SumAggContext withIfThenSlots(List ifThenSlots) { - return new SumAggContext(this.aliasToBePushDown, - this.ifConditions, - ifThenSlots, - this.groupKeys); - } - } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java index 5b87a482279eef..fa2c472bbc4183 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java @@ -56,7 +56,7 @@ public class SumAggWriter extends DefaultPlanRewriter { private static final double LOWER_AGGREGATE_EFFECT_COEFFICIENT = 10000; private static final double LOW_AGGREGATE_EFFECT_COEFFICIENT = 1000; private static final double MEDIUM_AGGREGATE_EFFECT_COEFFICIENT = 100; - private final StatsDerive derive = new StatsDerive(true); + private final StatsDerive derive = new StatsDerive(false); @Override public Plan visit(Plan plan, SumAggContext context) { From 937e4d0faf19b858ba2c8dfee632512ccc35d6bd Mon Sep 17 00:00:00 2001 From: englefly Date: Thu, 8 Jan 2026 17:36:40 +0800 Subject: [PATCH 10/21] =?UTF-8?q?=E6=A3=80=E6=9F=A5context=E7=9A=84?= =?UTF-8?q?=E5=AD=97=E6=AE=B5=20=E6=98=AFproject=E7=9A=84=E8=BE=93?= =?UTF-8?q?=E5=87=BA.=E6=8B=92=E7=BB=9D=20sum(A)=20=E4=B8=8B=E6=8E=A8=20pr?= =?UTF-8?q?oj(x,=20x+y=20as=20A)=20=E4=B8=94x=20=E4=B8=8D=E6=98=AFgroup=20?= =?UTF-8?q?key?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../eageraggregation/EagerAggRewriter.java | 52 +++++++++++++------ 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java index 28523d6d98e5d6..0cd3785304b958 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java @@ -210,6 +210,36 @@ private PushDownAggContext createContextFromProject(LogicalProject project, PushDownAggContext context) { + for (SlotReference slot : context.getGroupKeys()) { + if (!project.getOutputSet().contains(slot)) { + return false; + } + } + for (Slot slot : context.getAggFunctionsInputSlots()) { + if (!project.getOutputSet().contains(slot)) { + return false; + } + } + + // push sum(A) through project(x, x+y as A) + // if x is not used as group key, do not push through + for(Slot slot : context.getAggFunctionsInputSlots()) { + for (NamedExpression prj : project.getProjects()) { + if (prj instanceof Alias && prj.getExprId() == slot.getExprId()) { + if (prj.getInputSlots().stream() + .anyMatch( + s -> project.getOutputSet().contains(s) + && !context.getGroupKeys().contains(s))) { + return false; + } + } + } + } + + return true; + } + @Override public Plan visitLogicalProject(LogicalProject project, PushDownAggContext context) { if (project.child() instanceof LogicalCatalogRelation @@ -224,20 +254,8 @@ public Plan visitLogicalProject(LogicalProject project, PushDown return genAggregate(project, context); } - // check validation - // all slots in context are projected - List slotsInContext = context.getGroupKeys().stream() - .flatMap(e -> e.getInputSlots().stream()).collect(Collectors.toList()); - slotsInContext.addAll(context.getAggFunctionsInputSlots()); - for (Slot slot : slotsInContext) { - if (!project.getOutputSet().contains(slot)) { - if (SessionVariable.isFeDebug()) { - throw new RuntimeException("push down failed: " + slot + " is not in project \n" - + project.treeString()); - } else { - return project; - } - } + if (!canPushThroughProject(project, context)) { + return genAggregate(project, context); } PushDownAggContext newContext = createContextFromProject(project, context); @@ -264,7 +282,11 @@ public Plan visitLogicalProject(LogicalProject project, PushDown if (newChild.getOutputSet().containsAll(ne.getInputSlots())) { newProjections.add(ne); } else { - if (!(ne instanceof SlotReference && aggFuncInputSlots.contains((SlotReference) ne))) { + // if ne is not child output, it means ne is used by aggFunc + // push sum(a) through project(expr as a) + // "sum(expr)" is pushed, and newChild output x, x is alias of sum(expr) + // the new project should output x. + if (!aggFuncInputSlots.contains(ne.toSlot())) { if (SessionVariable.isFeDebug()) { throw new RuntimeException("push down Agg failed: " + ne + " is not in project \n" + project.treeString()); From 59d57b5dfcfdc82f55e7104984a2fb18bb5456fd Mon Sep 17 00:00:00 2001 From: englefly Date: Thu, 8 Jan 2026 22:01:51 +0800 Subject: [PATCH 11/21] simple sum-if no union --- .../eageraggregation/PushDownAggregation.java | 45 +++++++++++----- .../data/nereids_p0/eager_agg/eager_agg.out | 19 +++++++ .../nereids_p0/eager_agg/eager_agg.groovy | 14 +++++ .../suites/nereids_p0/eager_agg/load.groovy | 52 +++++++++++++++++++ 4 files changed, 116 insertions(+), 14 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java index 7d5ab6e4c17b48..5722c03026aace 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java @@ -45,6 +45,8 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.Max; import org.apache.doris.nereids.trees.expressions.functions.agg.Min; import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.expressions.functions.scalar.If; +import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; @@ -64,6 +66,7 @@ import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; /** * push down aggregation @@ -106,33 +109,47 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte return agg; } + List groupKeys = new ArrayList<>(); + for (Expression groupKey : agg.getGroupByExpressions()) { + if (groupKey instanceof SlotReference) { + groupKeys.add((SlotReference) groupKey); + } else { + if (SessionVariable.isFeDebug()) { + throw new RuntimeException("PushDownAggregation failed: agg is not normalized\n " + + agg.treeString()); + } else { + return agg; + } + } + } + List aggFunctions = new ArrayList<>(); for (AggregateFunction aggFunction : agg.getAggregateFunctions()) { if (pushDownAggFunctionSet.contains(aggFunction.getClass()) && !aggFunction.isDistinct()) { - aggFunctions.add(aggFunction); + if(aggFunction instanceof Sum && ((Sum) aggFunction).child() instanceof If) { + If body = (If) ((Sum) aggFunction).child(); + aggFunctions.add(new Sum(body.getTrueValue())); + if (!(body.getFalseValue() instanceof NullLiteral)) { + aggFunctions.add(new Sum(body.getFalseValue())); + } + groupKeys.addAll(body.getCondition().getInputSlots() + .stream().map(slot -> (SlotReference) slot).collect(Collectors.toList())); + } else { + aggFunctions.add(aggFunction); + } } else { return agg; } } - + aggFunctions = aggFunctions.stream().distinct().collect(Collectors.toList()); if (!checkSubTreePattern(agg.child())) { return agg; } - List groupKeys = new ArrayList<>(); - for (Expression groupKey : agg.getGroupByExpressions()) { - if (groupKey instanceof SlotReference) { - groupKeys.add((SlotReference) groupKey); - } else { - if (SessionVariable.isFeDebug()) { - throw new RuntimeException("PushDownAggregation failed: agg is not normalized\n " - + agg.treeString()); - } else { - return agg; - } - } + if (!checkSubTreePattern(agg.child())) { + return agg; } PushDownAggContext pushDownContext = new PushDownAggContext(new ArrayList<>(aggFunctions), diff --git a/regression-test/data/nereids_p0/eager_agg/eager_agg.out b/regression-test/data/nereids_p0/eager_agg/eager_agg.out index ad2c07d5f1da2f..f83fa7a2b7e6b1 100644 --- a/regression-test/data/nereids_p0/eager_agg/eager_agg.out +++ b/regression-test/data/nereids_p0/eager_agg/eager_agg.out @@ -142,3 +142,22 @@ Used: leading({ ss dt } ws ) UnUsed: SyntaxError: +-- !sum_if_push -- +cost = 21.726000000000003 +PhysicalResultSink[511] ( outputExprs=[d_week_seq#60, mon_sales#84, tue_sales#85, wed_sales#86, thu_sales#87, fri_sales#88, sat_sales#89] ) ++--PhysicalProject[507]@12 ( stats=1, projects=[d_week_seq#60, mon_sales#84, tue_sales#85, wed_sales#86, thu_sales#87, fri_sales#88, sat_sales#89] ) + +--PhysicalHashAggregate[503]@11 ( stats=1, aggPhase=GLOBAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[d_week_seq#60, ws_item_sk#3], outputExpr=[d_week_seq#60, ws_item_sk#3, sum(if((d_day_name#70 = 'Monday'), sum(ws_sales_price)#21, NULL)) AS `mon_sales`#84, sum(if((d_day_name#70 = 'Tuesday'), sum(ws_sales_price)#21, NULL)) AS `tue_sales`#85, sum(if((d_day_name#70 = 'Wednesday'), sum(ws_sales_price)#21, NULL)) AS `wed_sales`#86, sum(if((d_day_name#70 = 'Thursday'), sum(ws_sales_price)#21, NULL)) AS `thu_sales`#87, sum(if((d_day_name#70 = 'Friday'), sum(ws_sales_price)#21, NULL)) AS `fri_sales`#88, sum(if((d_day_name#70 = 'Saturday'), sum(ws_sales_price)#21, NULL)) AS `sat_sales`#89], partitionExpr=Optional.empty, topnFilter=false, topnPushDown=false ) + +--PhysicalProject[499]@10 ( stats=1, projects=[d_week_seq#60, ws_item_sk#3, sum(ws_sales_price)#21, d_day_name#70] ) + +--PhysicalHashJoin[495]@9 ( stats=1, type=INNER_JOIN, hashCondition=[(d_date_sk#56 = ws_sold_date_sk#0)], otherCondition=[], markCondition=[] ) + |--PhysicalProject[482]@6 ( stats=1, projects=[ws_sold_date_sk#0, ws_item_sk#3, sum(ws_sales_price)#21] ) + | +--PhysicalHashJoin[478]@5 ( stats=1, type=INNER_JOIN, hashCondition=[(ws_item_sk#3 = i_item_sk#34)], otherCondition=[], markCondition=[] ) + | |--PhysicalHashAggregate[465]@2 ( stats=1, aggPhase=GLOBAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[ws_sold_date_sk#0, ws_item_sk#3], outputExpr=[ws_sold_date_sk#0, ws_item_sk#3, sum(ws_sales_price#21) AS `sum(ws_sales_price)`#21], partitionExpr=Optional.empty, topnFilter=false, topnPushDown=false ) + | | +--PhysicalProject[461]@1 ( stats=1, projects=[ws_sold_date_sk#0, ws_item_sk#3, ws_sales_price#21] ) + | | +--PhysicalOlapScan[web_sales]@0 ( stats=1, operativeSlots=[ws_sold_date_sk#0, ws_item_sk#3, ws_sales_price#21], virtualColumns=[] ) + | +--PhysicalDistribute[474]@4 ( stats=1, distributionSpec=DistributionSpecReplicated ) + | +--PhysicalProject[470]@4 ( stats=1, projects=[i_item_sk#34] ) + | +--PhysicalOlapScan[item]@3 ( stats=1, operativeSlots=[i_item_sk#34], virtualColumns=[] ) + +--PhysicalDistribute[491]@8 ( stats=1, distributionSpec=DistributionSpecReplicated ) + +--PhysicalProject[487]@8 ( stats=1, projects=[d_date_sk#56, d_week_seq#60, d_day_name#70] ) + +--PhysicalOlapScan[date_dim]@7 ( stats=1, operativeSlots=[d_date_sk#56, d_week_seq#60, d_day_name#70], virtualColumns=[] ) + diff --git a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy index fe829bb86a121c..5473a8645c8ddd 100644 --- a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy +++ b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy @@ -105,4 +105,18 @@ suite("eager_agg") { and ss_item_sk = ws_item_sk group by dt.d_year, ss_hdemo_sk + d_moy """ + + qt_sum_if_push """ + explain physical plan + select d_week_seq, + sum(case when (d_day_name='Monday') then ws_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ws_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ws_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ws_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ws_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ws_sales_price else null end) sat_sales + from web_sales join item on ws_item_sk = i_item_sk + join date_dim on d_date_sk = ws_sold_date_sk + group by d_week_seq, ws_item_sk; + """ } diff --git a/regression-test/suites/nereids_p0/eager_agg/load.groovy b/regression-test/suites/nereids_p0/eager_agg/load.groovy index 2cb27d49d642da..42abc8d50a4a61 100644 --- a/regression-test/suites/nereids_p0/eager_agg/load.groovy +++ b/regression-test/suites/nereids_p0/eager_agg/load.groovy @@ -161,6 +161,37 @@ PROPERTIES ( "group_commit_data_bytes" = "134217728" ); +drop table if exists item; +CREATE TABLE `item` ( + `i_item_sk` bigint NULL, + `i_item_id` char(16) NULL, + `i_rec_start_date` date NULL, + `i_rec_end_date` date NULL, + `i_item_desc` varchar(200) NULL, + `i_current_price` decimal(7,2) NULL, + `i_wholesale_cost` decimal(7,2) NULL, + `i_brand_id` int NULL, + `i_brand` char(50) NULL, + `i_class_id` int NULL, + `i_class` char(50) NULL, + `i_category_id` int NULL, + `i_category` char(50) NULL, + `i_manufact_id` int NULL, + `i_manufact` char(50) NULL, + `i_size` char(20) NULL, + `i_formulation` char(20) NULL, + `i_color` char(20) NULL, + `i_units` char(10) NULL, + `i_container` char(10) NULL, + `i_manager_id` int NULL, + `i_product_name` char(50) NULL +) ENGINE=OLAP +DUPLICATE KEY(`i_item_sk`, `i_item_id`) +DISTRIBUTED BY HASH(`i_item_sk`) BUCKETS 3 +PROPERTIES ( +"replication_allocation" = "tag.location.default: 1" +); + INSERT INTO store_sales ( ss_sold_date_sk, ss_sold_time_sk, ss_item_sk, ss_customer_sk, ss_cdemo_sk, ss_hdemo_sk, ss_addr_sk, ss_store_sk, ss_promo_sk, ss_ticket_number, ss_quantity, @@ -208,6 +239,27 @@ INSERT INTO web_sales ( 3.47, 0.00, 5.00, 49.50, 52.97, 54.50, 58.00, 7.97 ); + +INSERT INTO item ( + i_item_sk, i_item_id, i_rec_start_date, i_rec_end_date, + i_item_desc, i_current_price, i_wholesale_cost, + i_brand_id, i_brand, i_class_id, i_class, + i_category_id, i_category, i_manufact_id, i_manufact, + i_size, i_formulation, i_color, i_units, i_container, + i_manager_id, i_product_name +) VALUES + (1001, 'ITEM-0001001', '2024-01-01', NULL, + 'Sample item 1001', 12.00, 10.00, + 10, 'BrandA', 101, 'ClassA', + 201, 'CategoryA', 301, 'ManufactA', + 'M', 'Std', 'Red', 'EA', 'BOX', + 1, 'Product 1001'), + (2001, 'ITEM-0002001', '2024-01-01', NULL, + 'Sample item 2001', 18.00, 15.00, + 11, 'BrandB', 102, 'ClassB', + 202, 'CategoryB', 302, 'ManufactB', + 'L', 'Std', 'Blue', 'EA', 'BOX', + 2, 'Product 2001'); """ } From a4b959b038b4e1820e27824d6f8729e361795131 Mon Sep 17 00:00:00 2001 From: englefly Date: Thu, 8 Jan 2026 22:34:43 +0800 Subject: [PATCH 12/21] =?UTF-8?q?sum-if=20=E5=9F=BA=E6=9C=AC=E6=AC=BE=20(?= =?UTF-8?q?=E8=BF=98=E6=B2=A1=E6=9C=89=E6=94=AF=E6=8C=81union),=2043=20?= =?UTF-8?q?=E6=9C=89=E6=8F=90=E5=8D=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../eageraggregation/EagerAggRewriter.java | 26 ++- .../eageraggregation/PushDownAggContext.java | 1 - .../eageraggregation/PushDownAggregation.java | 51 ++++-- .../PushdownSumIfAggregation.java | 6 +- .../expressions/functions/BoundFunction.java | 4 + .../doris/nereids/util/ExpressionUtils.java | 166 ++++++++++++------ .../org/apache/doris/qe/SessionVariable.java | 1 + .../data/nereids_p0/eager_agg/eager_agg.out | 47 +++-- .../tpcds_sf1000/shape/query43.out | 9 +- .../tpcds_sf1000_nopkfk/shape/query43.out | 9 +- .../nereids_p0/eager_agg/eager_agg.groovy | 84 +++++++++ 11 files changed, 315 insertions(+), 89 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java index 0cd3785304b958..32c5ac707a9444 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.rules.rewrite.eageraggregation; +import org.apache.doris.nereids.rules.analysis.CheckAfterRewrite; import org.apache.doris.nereids.rules.analysis.NormalizeAggregate; import org.apache.doris.nereids.rules.rewrite.StatsDerive; import org.apache.doris.nereids.stats.ExpressionEstimation; @@ -34,6 +35,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.qe.SessionVariable; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.Statistics; @@ -67,6 +69,7 @@ public class EagerAggRewriter extends DefaultPlanRewriter { private static final double LOW_AGGREGATE_EFFECT_COEFFICIENT = 1000; private static final double MEDIUM_AGGREGATE_EFFECT_COEFFICIENT = 100; private final StatsDerive derive = new StatsDerive(false); + private CheckAfterRewrite checker = new CheckAfterRewrite(); @Override public Plan visitLogicalJoin(LogicalJoin join, PushDownAggContext context) { @@ -144,12 +147,14 @@ public Plan visitLogicalJoin(LogicalJoin join, P } if (toLeft) { Plan newLeft = join.left().accept(this, childContext); + checker.checkTreeAllSlotReferenceFromChildren(newLeft); if (newLeft != join.left()) { return join.withChildren(newLeft, join.right()); } } else { Plan newRight = join.right().accept(this, childContext); if (newRight != join.right()) { + checker.checkTreeAllSlotReferenceFromChildren(newRight); return join.withChildren(join.left(), newRight); } } @@ -224,7 +229,7 @@ private boolean canPushThroughProject(LogicalProject project, Pu // push sum(A) through project(x, x+y as A) // if x is not used as group key, do not push through - for(Slot slot : context.getAggFunctionsInputSlots()) { + for (Slot slot : context.getAggFunctionsInputSlots()) { for (NamedExpression prj : project.getProjects()) { if (prj instanceof Alias && prj.getExprId() == slot.getExprId()) { if (prj.getInputSlots().stream() @@ -260,6 +265,7 @@ public Plan visitLogicalProject(LogicalProject project, PushDown PushDownAggContext newContext = createContextFromProject(project, context); Plan newChild = project.child().accept(this, newContext); + checker.checkTreeAllSlotReferenceFromChildren(newChild); if (newChild != project.child()) { /* * agg[sum(a), groupBy(b)] @@ -305,12 +311,28 @@ public Plan visitLogicalProject(LogicalProject project, PushDown } } - return project.withProjectsAndChild(newProjections, newChild); + return project.withProjectsAndChild( + newProjections.stream().map(e -> (NamedExpression) replaceBySlots(e, newChild.getOutput())) + .collect(Collectors.toList()), + newChild); } return project; } + private static Expression replaceBySlots(Expression expression, List slots) { + Map replaceMap = new HashMap<>(); + for (Slot slot1 : expression.getInputSlots()) { + for (Slot slot2 : slots) { + if (slot1.getExprId().asInt() == slot2.getExprId().asInt()) { + replaceMap.put(slot1, slot2); + } + } + } + Expression result = ExpressionUtils.replace(expression, replaceMap); + return result; + } + @Override public Plan visitLogicalAggregate(LogicalAggregate agg, PushDownAggContext context) { return agg; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java index 8ee05609733d3b..181a7000e4d371 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java @@ -27,7 +27,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java index 5722c03026aace..e31c8c2e6ce01f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java @@ -37,9 +37,9 @@ import org.apache.doris.nereids.jobs.JobContext; import org.apache.doris.nereids.rules.analysis.NormalizeAggregate; import org.apache.doris.nereids.rules.rewrite.AdjustNullable; -import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; import org.apache.doris.nereids.trees.expressions.functions.agg.Max; @@ -56,6 +56,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.qe.SessionVariable; import com.google.common.collect.Sets; @@ -63,7 +64,9 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -128,7 +131,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte for (AggregateFunction aggFunction : agg.getAggregateFunctions()) { if (pushDownAggFunctionSet.contains(aggFunction.getClass()) && !aggFunction.isDistinct()) { - if(aggFunction instanceof Sum && ((Sum) aggFunction).child() instanceof If) { + if (aggFunction instanceof Sum && ((Sum) aggFunction).child() instanceof If) { If body = (If) ((Sum) aggFunction).child(); aggFunctions.add(new Sum(body.getTrueValue())); if (!(body.getFalseValue() instanceof NullLiteral)) { @@ -144,6 +147,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte } } aggFunctions = aggFunctions.stream().distinct().collect(Collectors.toList()); + groupKeys = groupKeys.stream().distinct().collect(Collectors.toList()); if (!checkSubTreePattern(agg.child())) { return agg; } @@ -168,20 +172,43 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte // ->scan(T1[A...]) // ->scan(T2) List newOutputExpressions = new ArrayList<>(); + Map replaceMap = new HashMap<>(); + for (Expression x : pushDownContext.getAliasMap().keySet()) { + replaceMap.put(x.child(0), pushDownContext.getAliasMap().get(x).toSlot()); + } + for (NamedExpression ne : agg.getOutputExpressions()) { if (ne instanceof SlotReference) { newOutputExpressions.add(ne); + //} else if (ne instanceof Alias + // && ne.child(0) instanceof Sum + // && ne.child(0).child(0) instanceof If + // && ne.child(0).child(0).child(1) instanceof SlotReference) { + // SlotReference targetSlot = (SlotReference) ne.child(0).child(0).child(1); + // Slot toReplace = null; + // for (Slot slot : child.getOutput()) { + // if (slot.getExprId().equals(targetSlot.getExprId())) { + // toReplace = slot; + // } + // } + // if (toReplace != null) { + // Alias newOutput = (Alias) ((Alias) ne).withChildren( + // new Sum( + // new If( + // ne.child(0).child(0).child(0), + // toReplace, + // new NullLiteral(toReplace.getDataType()) + // ) + // ) + // ); + // newOutputExpressions.add(newOutput); + // } else { + // return agg; + // } + } else { - NamedExpression replaceAliasExpr = (NamedExpression) ne - .rewriteDownShortCircuit(e -> { - Alias alias = pushDownContext.getAliasMap().get(e); - if (alias != null) { - AggregateFunction aggFunction = (AggregateFunction) e; - return aggFunction.withChildren(alias.toSlot()); - } else { - return e; - } - }); + NamedExpression replaceAliasExpr = (NamedExpression) ExpressionUtils.replace(ne, replaceMap); + replaceAliasExpr = (NamedExpression) ExpressionUtils.rebuildSignature(replaceAliasExpr); newOutputExpressions.add(replaceAliasExpr); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java index 57465c5f982e0d..8f2f127df80a58 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.rules.rewrite.eageraggregation; import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.rules.analysis.CheckAfterRewrite; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.Expression; @@ -47,7 +48,8 @@ public class PushdownSumIfAggregation extends DefaultPlanRewriter im @Override public Plan rewriteRoot(Plan plan, JobContext jobContext) { - return plan.accept(this, jobContext); + return plan; + //return plan.accept(this, jobContext); } @Override @@ -111,6 +113,8 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte SumAggContext sumAggContext = new SumAggContext(aliasToBePushDown, ifConditions, ifThenSlots, groupKeys); SumAggWriter writer = new SumAggWriter(); Plan child = agg.child().accept(writer, sumAggContext); + CheckAfterRewrite checker = new CheckAfterRewrite(); + checker.checkTreeAllSlotReferenceFromChildren(child); if (child != agg.child()) { List outputExpressions = agg.getOutputExpressions(); List newOutputExpressions = new ArrayList<>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java index 16d7740cf9c2a4..ea472071678e8d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java @@ -155,4 +155,8 @@ private Supplier buildSignatureCache(Supplier optionalAnd(Collection collection } /** - * AND / OR expression, also remove duplicate expression, boolean literal + * Rebuild expression tree and refresh BoundFunction signatures. + * If an expression is a BoundFunction, recreate it with rebuilt children and + * reset its signature. + * Other expressions are recreated only when children change. + * + * @return rebuilt expression (may be the same instance when unchanged and + * non-BoundFunction) + */ + public static Expression rebuildSignature(Expression expr) { + List newChildren = expr.children().stream() + .map(ExpressionUtils::rebuildSignature) + .collect(Collectors.toList()); + return MoreFieldsThread.keepFunctionSignature(false, + () -> { + boolean childrenUnchanged = true; + List originChildren = expr.children(); + if (originChildren.size() != newChildren.size()) { + childrenUnchanged = false; + } else { + for (int i = 0; i < originChildren.size(); i++) { + if (originChildren.get(i) != newChildren.get(i)) { + childrenUnchanged = false; + break; + } + } + } + + if (expr instanceof BoundFunction) { + BoundFunction fn = (BoundFunction) expr; + BoundFunction rebuilt = (BoundFunction) fn.withChildren(newChildren); + if (!fn.getDataType().equals(rebuilt.getDataType())) { + return new Cast(rebuilt, fn.getDataType()); + } + return rebuilt; + } + + if (childrenUnchanged) { + return expr; + } + return expr.withChildren(newChildren); + }); + + } + + /** + * AND / OR expression, also remove duplicate expression, boolean literal */ public static Expression compound(boolean isAnd, Collection expressions) { return isAnd ? and(expressions) : or(expressions); } /** - * AND expression, also remove duplicate expression, boolean literal + * AND expression, also remove duplicate expression, boolean literal */ public static Expression and(Collection expressions) { if (expressions.size() == 1) { @@ -234,7 +280,7 @@ public static Expression and(Collection expressions) { } /** - * AND expression, also remove duplicate expression, boolean literal + * AND expression, also remove duplicate expression, boolean literal */ public static Expression and(Expression... expressions) { return and(Lists.newArrayList(expressions)); @@ -249,14 +295,14 @@ public static Optional optionalOr(List expressions) { } /** - * OR expression, also remove duplicate expression, boolean literal + * OR expression, also remove duplicate expression, boolean literal */ public static Expression or(Expression... expressions) { return or(Lists.newArrayList(expressions)); } /** - * OR expression, also remove duplicate expression, boolean literal + * OR expression, also remove duplicate expression, boolean literal */ public static Expression or(Collection expressions) { if (expressions.size() == 1) { @@ -319,7 +365,8 @@ public static List shuttleExpressionWithLineage(List S selectMinimumColumn(Collection sl } /** - * Check whether the input expression is a {@link org.apache.doris.nereids.trees.expressions.Slot} - * or at least one {@link Cast} on a {@link org.apache.doris.nereids.trees.expressions.Slot} + * Check whether the input expression is a + * {@link org.apache.doris.nereids.trees.expressions.Slot} + * or at least one {@link Cast} on a + * {@link org.apache.doris.nereids.trees.expressions.Slot} *

* for example: * - SlotReference to a column: @@ -378,7 +427,8 @@ public static S selectMinimumColumn(Collection sl * cast(cast(int_col as long) as string) * * @param expr input expression - * @return Return Optional[ExprId] of underlying slot reference if input expression is a slot or cast on slot. + * @return Return Optional[ExprId] of underlying slot reference if input + * expression is a slot or cast on slot. * Otherwise, return empty optional result. */ public static Optional isSlotOrCastOnSlot(Expression expr) { @@ -386,8 +436,10 @@ public static Optional isSlotOrCastOnSlot(Expression expr) { } /** - * Check whether the input expression is a {@link org.apache.doris.nereids.trees.expressions.Slot} - * or at least one {@link Cast} on a {@link org.apache.doris.nereids.trees.expressions.Slot} + * Check whether the input expression is a + * {@link org.apache.doris.nereids.trees.expressions.Slot} + * or at least one {@link Cast} on a + * {@link org.apache.doris.nereids.trees.expressions.Slot} */ public static Optional extractSlotOrCastOnSlot(Expression expr) { while (expr instanceof Cast) { @@ -402,7 +454,8 @@ public static Optional extractSlotOrCastOnSlot(Expression expr) { } /** - * Generate replaceMap Slot -> Expression from NamedExpression[Expression as name] + * Generate replaceMap Slot -> Expression from NamedExpression[Expression as + * name] */ public static Map generateReplaceMap(List namedExpressions) { Map replaceMap = Maps.newLinkedHashMapWithExpectedSize(namedExpressions.size()); @@ -430,7 +483,8 @@ public static NamedExpression replaceNameExpression(NamedExpression expr, } /** - * Replace expression node with predicate in the expression tree by `replaceMap` in top-down manner. + * Replace expression node with predicate in the expression tree by `replaceMap` + * in top-down manner. */ public static Expression replaceIf(Expression expr, Map replaceMap, Predicate predicate, boolean stopWhenNotMatched) { @@ -460,9 +514,11 @@ public static List replaceWithCounter(List exprs, } /** - * Replace expression node in the expression tree by `replaceMap` in top-down manner. + * Replace expression node in the expression tree by `replaceMap` in top-down + * manner. * This function gives counter map to record replace count. * For example. + * *

      * input expression: a > 1
      * replaceMap: a -> b + c
@@ -489,8 +545,10 @@ public static Expression replaceWithCounter(Expression expr,
     }
 
     /**
-     * Replace expression node in the expression tree by `replaceMap` in top-down manner.
+     * Replace expression node in the expression tree by `replaceMap` in top-down
+     * manner.
      * For example.
+     *
      * 
      * input expression: a > 1
      * replaceMap: a -> b + c
@@ -507,8 +565,10 @@ public static Expression replace(Expression expr, Map
      * input expression: a > 1
      * replaceMap: d -> b + c, transferMap: a -> d
@@ -562,7 +622,8 @@ public static Expression replaceNullAware(Expression expr,
             Expression replacedExpr = replaceMap.get(e);
             if (replacedExpr == null && e instanceof SlotReference
                     && e.getDataType() instanceof VariantType) {
-                // this is valid, because the variant expression would be extended in expression rewrite
+                // this is valid, because the variant expression would be extended in expression
+                // rewrite
                 return e;
             }
             if (replacedExpr == null && e instanceof NamedExpression) {
@@ -576,7 +637,8 @@ public static Expression replaceNullAware(Expression expr,
     }
 
     /**
-     * Replace expression node in the expression tree by `replaceMap` in top-down manner.
+     * Replace expression node in the expression tree by `replaceMap` in top-down
+     * manner.
      */
     public static List replaceNamedExpressions(List namedExpressions,
             Map replaceMap) {
@@ -596,8 +658,8 @@ public static List replaceNamedExpressions(List
-                e instanceof UniqueFunction ? ((UniqueFunction) e).withIgnoreUniqueId(ignoreUniqueId) : e);
+        return expression.rewriteDownShortCircuit(
+                e -> e instanceof UniqueFunction ? ((UniqueFunction) e).withIgnoreUniqueId(ignoreUniqueId) : e);
     }
 
     public static  List rewriteDownShortCircuit(
@@ -665,17 +727,18 @@ public static boolean hasNullLiteral(List children) {
     public static boolean canInferNotNullForMarkSlot(Expression predicate, ExpressionRewriteContext ctx) {
         /*
          * assume predicate is from LogicalFilter
-         * the idea is replacing each mark join slot with null and false literal then run FoldConstant rule
+         * the idea is replacing each mark join slot with null and false literal then
+         * run FoldConstant rule
          * if the evaluate result are:
          * 1. all true
-         * 2. all null and false (in logicalFilter, we discard both null and false values)
+         * 2. all null and false (in logicalFilter, we discard both null and false
+         * values)
          * the mark slot can be non-nullable boolean
          * and in semi join, we can safely change the mark conjunct to hash conjunct
          */
-        ImmutableList literals =
-                ImmutableList.of(NullLiteral.BOOLEAN_INSTANCE, BooleanLiteral.FALSE);
-        List markJoinSlotReferenceList =
-                new ArrayList<>((predicate.collect(MarkJoinSlotReference.class::isInstance)));
+        ImmutableList literals = ImmutableList.of(NullLiteral.BOOLEAN_INSTANCE, BooleanLiteral.FALSE);
+        List markJoinSlotReferenceList = new ArrayList<>(
+                (predicate.collect(MarkJoinSlotReference.class::isInstance)));
         int markSlotSize = markJoinSlotReferenceList.size();
         int maxMarkSlotCount = 4;
         // if the conjunct has mark slot, and maximum 4 mark slots(for performance)
@@ -684,9 +747,9 @@ public static boolean canInferNotNullForMarkSlot(Expression predicate, Expressio
             boolean meetTrue = false;
             boolean meetNullOrFalse = false;
             /*
-             * markSlotSize = 1 -> loopCount = 2  ---- 0, 1
-             * markSlotSize = 2 -> loopCount = 4  ---- 00, 01, 10, 11
-             * markSlotSize = 3 -> loopCount = 8  ---- 000, 001, 010, 011, 100, 101, 110, 111
+             * markSlotSize = 1 -> loopCount = 2 ---- 0, 1
+             * markSlotSize = 2 -> loopCount = 4 ---- 00, 01, 10, 11
+             * markSlotSize = 3 -> loopCount = 8 ---- 000, 001, 010, 011, 100, 101, 110, 111
              * markSlotSize = 4 -> loopCount = 16 ---- 0000, 0001, ... 1111
              */
             int loopCount = 1 << markSlotSize;
@@ -702,8 +765,7 @@ public static boolean canInferNotNullForMarkSlot(Expression predicate, Expressio
                 }
                 Expression evalResult = FoldConstantRule.evaluate(
                         ExpressionUtils.replace(predicate, replaceMap),
-                        ctx
-                );
+                        ctx);
 
                 if (evalResult.equals(BooleanLiteral.TRUE)) {
                     if (meetNullOrFalse) {
@@ -742,8 +804,7 @@ public static Set inferNotNullSlots(Set predicates, CascadesCo
                 replaceMap.put(slot, nullLiteral);
                 Expression evalExpr = FoldConstantRule.evaluate(
                         ExpressionUtils.replace(predicate, replaceMap),
-                        new ExpressionRewriteContext(cascadesContext)
-                );
+                        new ExpressionRewriteContext(cascadesContext));
                 if (evalExpr.isNullLiteral() || BooleanLiteral.FALSE.equals(evalExpr)) {
                     notNullSlots.add(slot);
                 }
@@ -764,7 +825,8 @@ public static Set inferNotNull(Set predicates, CascadesC
     }
 
     /**
-     * infer notNulls slot from predicate but these slots must be in the given slots.
+     * infer notNulls slot from predicate but these slots must be in the given
+     * slots.
      */
     public static Set inferNotNull(Set predicates, Set slots,
             CascadesContext cascadesContext) {
@@ -919,7 +981,7 @@ public static boolean isInjective(Expression expression) {
         return expression instanceof Slot;
     }
 
-    // if the input is unique,  the output of agg is unique, too
+    // if the input is unique, the output of agg is unique, too
     public static boolean isInjectiveAgg(Expression agg) {
         return agg instanceof Sum || agg instanceof Avg || agg instanceof Max || agg instanceof Min;
     }
@@ -937,7 +999,8 @@ public static  Set mutableCollect(List expressions,
     public static  List collectAll(Collection expressions,
             Predicate> predicate) {
         switch (expressions.size()) {
-            case 0: return ImmutableList.of();
+            case 0:
+                return ImmutableList.of();
             default: {
                 ImmutableList.Builder result = ImmutableList.builder();
                 for (Expression expr : expressions) {
@@ -1034,7 +1097,8 @@ public static Expression getSingleNumericSlotOrExpressionCoveredByCast(Expressio
         }
         // for other datatype, only support cast.
         // example: T1 join T2 on subStr(T1.a, 1,4) = subStr(T2.a, 1,4)
-        // the cost of subStr is too high, and hence we do not generate RF subStr(T2.a, 1,4)->subStr(T1.a, 1,4)
+        // the cost of subStr is too high, and hence we do not generate RF subStr(T2.a,
+        // 1,4)->subStr(T1.a, 1,4)
         while (expression instanceof Cast) {
             expression = ((Cast) expression).child();
         }
@@ -1046,18 +1110,18 @@ public static Expression getSingleNumericSlotOrExpressionCoveredByCast(Expressio
      */
     public static boolean checkSlotConstant(Slot slot, Set predicates) {
         return predicates.stream().anyMatch(predicate -> {
-                    if (predicate instanceof EqualTo) {
-                        EqualTo equalTo = (EqualTo) predicate;
-                        return (equalTo.left() instanceof Literal && equalTo.right().equals(slot))
-                                || (equalTo.right() instanceof Literal && equalTo.left().equals(slot));
-                    }
-                    return false;
-                }
-        );
+            if (predicate instanceof EqualTo) {
+                EqualTo equalTo = (EqualTo) predicate;
+                return (equalTo.left() instanceof Literal && equalTo.right().equals(slot))
+                        || (equalTo.right() instanceof Literal && equalTo.left().equals(slot));
+            }
+            return false;
+        });
     }
 
     /**
-     * Check the expression is inferred or not, if inferred return true, nor return false
+     * Check the expression is inferred or not, if inferred return true, nor return
+     * false
      */
     public static boolean isInferred(Expression expression) {
         return expression.accept(new DefaultExpressionVisitor() {
@@ -1171,8 +1235,7 @@ public static Literal analyzeAndFoldToLiteral(ConnectContext ctx, Expression exp
         }
         ExpressionRewriteContext context = new ExpressionRewriteContext(cascadesContext);
         ExpressionRuleExecutor executor = new ExpressionRuleExecutor(ImmutableList.of(
-                ExpressionRewrite.bottomUp(ReplaceVariableByLiteral.INSTANCE)
-        ));
+                ExpressionRewrite.bottomUp(ReplaceVariableByLiteral.INSTANCE)));
         Expression rewrittenExpression = executor.rewrite(analyzedExpr, context);
         Expression foldExpression = FoldConstantRule.evaluate(rewrittenExpression, context);
         if (foldExpression instanceof Literal) {
@@ -1247,8 +1310,8 @@ public static boolean containsCaseWhenLikeType(Expression expression) {
     public static Optional> getCaseWhenLikeBranchResults(Expression expression) {
         if (expression instanceof CaseWhen) {
             CaseWhen caseWhen = (CaseWhen) expression;
-            ImmutableList.Builder builder
-                    = ImmutableList.builderWithExpectedSize(caseWhen.getWhenClauses().size() + 1);
+            ImmutableList.Builder builder = ImmutableList
+                    .builderWithExpectedSize(caseWhen.getWhenClauses().size() + 1);
             for (WhenClause whenClause : caseWhen.getWhenClauses()) {
                 builder.add(whenClause.getResult());
             }
@@ -1288,7 +1351,8 @@ public static boolean hasNonWindowAggregateFunction(Expression expression) {
     }
 
     /**
-     * check if the expressions contain a unique function which exists multiple times
+     * check if the expressions contain a unique function which exists multiple
+     * times
      */
     public static boolean containUniqueFunctionExistMultiple(Collection expressions) {
         Set counterSet = Sets.newHashSet();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 373b92daa39991..3a7fccc8380ea1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -2203,6 +2203,7 @@ public static int getEagerAggregationMode() {
 
     @VariableMgr.VarAttr(name = "eager_aggregation_on_join", needForward = true)
     public boolean eagerAggregationOnJoin = false;
+
     public static boolean isEagerAggregationOnJoin() {
         if (ConnectContext.get() != null) {
             return ConnectContext.get().getSessionVariable().eagerAggregationOnJoin;
diff --git a/regression-test/data/nereids_p0/eager_agg/eager_agg.out b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
index f83fa7a2b7e6b1..c3a9ec7b1c5bc7 100644
--- a/regression-test/data/nereids_p0/eager_agg/eager_agg.out
+++ b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
@@ -23,6 +23,8 @@ Used: leading({ ss ws } dt )
 UnUsed:
 SyntaxError:
 
+-- !a_exe --
+
 -- !a2 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -47,6 +49,8 @@ Used: leading({ ss ws } dt )
 UnUsed:
 SyntaxError:
 
+-- !a2_exe --
+
 -- !sum_min_max --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -71,6 +75,8 @@ Used: leading({ ss ws } dt )
 UnUsed:
 SyntaxError:
 
+-- !sum_min_max_exe --
+
 -- !avg_count --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -92,6 +98,8 @@ Used: leading({ ss ws } dt )
 UnUsed:
 SyntaxError:
 
+-- !avg_count_exe --
+
 -- !groupkey_push_SS_JOIN_D --
 PhysicalResultSink
 --PhysicalProject
@@ -117,6 +125,8 @@ Used: leading({ ss dt } ws )
 UnUsed:
 SyntaxError:
 
+-- !groupkey_push_SS_JOIN_D_exe --
+
 -- !groupkey_push --
 PhysicalResultSink
 --PhysicalProject
@@ -142,22 +152,27 @@ Used: leading({ ss dt } ws )
 UnUsed:
 SyntaxError:
 
+-- !groupkey_push_exe --
+
 -- !sum_if_push --
-cost = 21.726000000000003
-PhysicalResultSink[511] ( outputExprs=[d_week_seq#60, mon_sales#84, tue_sales#85, wed_sales#86, thu_sales#87, fri_sales#88, sat_sales#89] )
-+--PhysicalProject[507]@12 ( stats=1, projects=[d_week_seq#60, mon_sales#84, tue_sales#85, wed_sales#86, thu_sales#87, fri_sales#88, sat_sales#89] )
-   +--PhysicalHashAggregate[503]@11 ( stats=1, aggPhase=GLOBAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[d_week_seq#60, ws_item_sk#3], outputExpr=[d_week_seq#60, ws_item_sk#3, sum(if((d_day_name#70 = 'Monday'), sum(ws_sales_price)#21, NULL)) AS `mon_sales`#84, sum(if((d_day_name#70 = 'Tuesday'), sum(ws_sales_price)#21, NULL)) AS `tue_sales`#85, sum(if((d_day_name#70 = 'Wednesday'), sum(ws_sales_price)#21, NULL)) AS `wed_sales`#86, sum(if((d_day_name#70 = 'Thursday'), sum(ws_sales_price)#21, NULL)) AS `thu_sales`#87, sum(if((d_day_name#70 = 'Friday'), sum(ws_sales_price)#21, NULL)) AS `fri_sales`#88, sum(if((d_day_name#70 = 'Saturday'), sum(ws_sales_price)#21, NULL)) AS `sat_sales`#89], partitionExpr=Optional.empty, topnFilter=false, topnPushDown=false )
-      +--PhysicalProject[499]@10 ( stats=1, projects=[d_week_seq#60, ws_item_sk#3, sum(ws_sales_price)#21, d_day_name#70] )
-         +--PhysicalHashJoin[495]@9 ( stats=1, type=INNER_JOIN, hashCondition=[(d_date_sk#56 = ws_sold_date_sk#0)], otherCondition=[], markCondition=[] )
-            |--PhysicalProject[482]@6 ( stats=1, projects=[ws_sold_date_sk#0, ws_item_sk#3, sum(ws_sales_price)#21] )
-            |  +--PhysicalHashJoin[478]@5 ( stats=1, type=INNER_JOIN, hashCondition=[(ws_item_sk#3 = i_item_sk#34)], otherCondition=[], markCondition=[] )
-            |     |--PhysicalHashAggregate[465]@2 ( stats=1, aggPhase=GLOBAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[ws_sold_date_sk#0, ws_item_sk#3], outputExpr=[ws_sold_date_sk#0, ws_item_sk#3, sum(ws_sales_price#21) AS `sum(ws_sales_price)`#21], partitionExpr=Optional.empty, topnFilter=false, topnPushDown=false )
-            |     |  +--PhysicalProject[461]@1 ( stats=1, projects=[ws_sold_date_sk#0, ws_item_sk#3, ws_sales_price#21] )
-            |     |     +--PhysicalOlapScan[web_sales]@0 ( stats=1, operativeSlots=[ws_sold_date_sk#0, ws_item_sk#3, ws_sales_price#21], virtualColumns=[] )
-            |     +--PhysicalDistribute[474]@4 ( stats=1, distributionSpec=DistributionSpecReplicated )
-            |        +--PhysicalProject[470]@4 ( stats=1, projects=[i_item_sk#34] )
-            |           +--PhysicalOlapScan[item]@3 ( stats=1, operativeSlots=[i_item_sk#34], virtualColumns=[] )
-            +--PhysicalDistribute[491]@8 ( stats=1, distributionSpec=DistributionSpecReplicated )
-               +--PhysicalProject[487]@8 ( stats=1, projects=[d_date_sk#56, d_week_seq#60, d_day_name#70] )
+cost = 28.726000000000003
+PhysicalResultSink[586] ( outputExprs=[d_week_seq#60, mon_sales#84, tue_sales#85, wed_sales#86, thu_sales#87, fri_sales#88, sat_sales#89] )
++--PhysicalProject[582]@12 ( stats=1, projects=[d_week_seq#60, mon_sales#84, tue_sales#85, wed_sales#86, thu_sales#87, fri_sales#88, sat_sales#89] )
+   +--PhysicalHashAggregate[578]@11 ( stats=1, aggPhase=GLOBAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[d_week_seq#60, ws_item_sk#3], outputExpr=[d_week_seq#60, ws_item_sk#3, sum(if((d_day_name#70 = 'Monday'), sum(ws_sales_price)#21, NULL)) AS `mon_sales`#84, sum(if((d_day_name#70 = 'Tuesday'), sum(ws_sales_price)#21, NULL)) AS `tue_sales`#85, sum(if((d_day_name#70 = 'Wednesday'), sum(ws_sales_price)#21, NULL)) AS `wed_sales`#86, sum(if((d_day_name#70 = 'Thursday'), sum(ws_sales_price)#21, NULL)) AS `thu_sales`#87, sum(if((d_day_name#70 = 'Friday'), sum(ws_sales_price)#21, NULL)) AS `fri_sales`#88, sum(if((d_day_name#70 = 'Saturday'), sum(ws_sales_price)#21, NULL)) AS `sat_sales`#89], partitionExpr=Optional.empty, topnFilter=false, topnPushDown=false )
+      +--PhysicalProject[574]@10 ( stats=2, projects=[d_week_seq#60, ws_item_sk#3, sum(ws_sales_price)#21, d_day_name#70] )
+         +--PhysicalHashJoin[570]@9 ( stats=2, type=INNER_JOIN, hashCondition=[(d_date_sk#56 = ws_sold_date_sk#0)], otherCondition=[], markCondition=[] )
+            |--PhysicalProject[557]@6 ( stats=2, projects=[ws_sold_date_sk#0, ws_item_sk#3, sum(ws_sales_price)#21] )
+            |  +--PhysicalHashJoin[553]@5 ( stats=2, type=INNER_JOIN, hashCondition=[(ws_item_sk#3 = i_item_sk#34)], otherCondition=[], markCondition=[] )
+            |     |--PhysicalProject[536]@4 ( stats=2, projects=[i_item_sk#34] )
+            |     |  +--PhysicalOlapScan[item]@3 ( stats=2, operativeSlots=[i_item_sk#34], virtualColumns=[] )
+            |     +--PhysicalDistribute[549]@2 ( stats=1, distributionSpec=DistributionSpecReplicated )
+            |        +--PhysicalHashAggregate[545]@2 ( stats=1, aggPhase=GLOBAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[ws_sold_date_sk#0, ws_item_sk#3], outputExpr=[ws_sold_date_sk#0, ws_item_sk#3, sum(ws_sales_price#21) AS `sum(ws_sales_price)`#21], partitionExpr=Optional.empty, topnFilter=false, topnPushDown=false )
+            |           +--PhysicalProject[541]@1 ( stats=1, projects=[ws_sold_date_sk#0, ws_item_sk#3, ws_sales_price#21] )
+            |              +--PhysicalOlapScan[web_sales]@0 ( stats=1, operativeSlots=[ws_sold_date_sk#0, ws_item_sk#3, ws_sales_price#21], virtualColumns=[] )
+            +--PhysicalDistribute[566]@8 ( stats=1, distributionSpec=DistributionSpecReplicated )
+               +--PhysicalProject[562]@8 ( stats=1, projects=[d_date_sk#56, d_week_seq#60, d_day_name#70] )
                   +--PhysicalOlapScan[date_dim]@7 ( stats=1, operativeSlots=[d_date_sk#56, d_week_seq#60, d_day_name#70], virtualColumns=[] )
 
+-- !sum_if_push --
+1	\N	\N	\N	\N	\N	\N
+
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query43.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query43.out
index 37ab89010ef0a9..38ee41c557e4dd 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query43.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query43.out
@@ -10,9 +10,12 @@ PhysicalResultSink
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ----------------------PhysicalProject
 ------------------------filter((date_dim.d_year = 2000))
 --------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query43.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query43.out
index 37ab89010ef0a9..38ee41c557e4dd 100644
--- a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query43.out
+++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query43.out
@@ -10,9 +10,12 @@ PhysicalResultSink
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ----------------------PhysicalProject
 ------------------------filter((date_dim.d_year = 2000))
 --------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
index 5473a8645c8ddd..29a859786759cd 100644
--- a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
+++ b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
@@ -35,6 +35,18 @@ suite("eager_agg") {
     group by dt.d_year
     """
 
+   qt_a_exe"""
+    select /*+leading({ss ws} dt)*/ dt.d_year 
+       ,sum(ws_list_price) brand
+       ,sum(ss_sales_price) sum_agg
+    from  date_dim dt 
+        ,store_sales ss
+        ,web_sales ws
+    where dt.d_date_sk = ss_sold_date_sk
+    and ss_item_sk = ws_item_sk
+    group by dt.d_year
+    """
+
     // push to ss-join-ws
     qt_a2 """
     explain shape plan
@@ -49,6 +61,18 @@ suite("eager_agg") {
     group by dt.d_year
     """
 
+    qt_a2_exe """
+    select /*+leading({ss ws} dt)*/ dt.d_year 
+       ,sum(ws_list_price + ss_sales_price) brand
+
+    from  date_dim dt 
+        ,store_sales ss
+        ,web_sales ws
+    where dt.d_date_sk = ss_sold_date_sk
+    and ss_item_sk = ws_item_sk
+    group by dt.d_year
+    """
+    
     // push sum/min/max aggFunc
     qt_sum_min_max """
     explain shape plan
@@ -64,6 +88,18 @@ suite("eager_agg") {
     group by dt.d_year
     """
 
+    qt_sum_min_max_exe """
+    select /*+leading({ss ws} dt)*/ dt.d_year 
+       ,sum(ws_list_price) brand
+       ,min(ss_sales_price) min_agg
+       ,max(ss_sales_price) max_agg
+    from  date_dim dt 
+        ,store_sales ss
+        ,web_sales ws
+    where dt.d_date_sk = ss_sold_date_sk
+    and ss_item_sk = ws_item_sk
+    group by dt.d_year
+    """
 
     // do not push avg/count aggFunc
     qt_avg_count """
@@ -78,6 +114,17 @@ suite("eager_agg") {
     group by dt.d_year
     """
 
+    qt_avg_count_exe """
+    select /*+leading({ss ws} dt)*/ dt.d_year 
+       ,avg(ws_list_price) 
+    from  date_dim dt 
+        ,store_sales ss
+        ,web_sales ws
+    where dt.d_date_sk = ss_sold_date_sk
+    and ss_item_sk = ws_item_sk
+    group by dt.d_year
+    """
+
     // agg push to ss-d
     qt_groupkey_push_SS_JOIN_D """
     explain shape plan
@@ -92,6 +139,18 @@ suite("eager_agg") {
     group by dt.d_year, ss_hdemo_sk + ws_quantity
     """
 
+    qt_groupkey_push_SS_JOIN_D_exe """
+    select /*+leading({ss dt} ws)*/  dt.d_year 
+        ,sum(ss_wholesale_cost) brand
+        ,sum(ss_sales_price + d_moy) sum_agg
+    from  store_sales ss
+        join date_dim dt
+        join web_sales ws
+    where dt.d_date_sk = ss_sold_date_sk
+    and ss_item_sk = ws_item_sk
+    group by dt.d_year, ss_hdemo_sk + ws_quantity
+    """
+
     // group key: ss_hdemo_sk + d_moy => push to ss-d
     qt_groupkey_push """
     explain shape plan
@@ -106,6 +165,18 @@ suite("eager_agg") {
     group by dt.d_year, ss_hdemo_sk + d_moy
     """
 
+    qt_groupkey_push_exe """
+    select /*+leading({ss dt} ws)*/  dt.d_year 
+        ,sum(ss_wholesale_cost) brand
+        ,sum(ss_sales_price) sum_agg
+    from  store_sales ss
+        join date_dim dt
+        join web_sales ws
+    where dt.d_date_sk = ss_sold_date_sk
+    and ss_item_sk = ws_item_sk
+    group by dt.d_year, ss_hdemo_sk + d_moy
+    """
+
     qt_sum_if_push """
         explain physical plan
         select d_week_seq,
@@ -119,4 +190,17 @@ suite("eager_agg") {
                         join date_dim on d_date_sk = ws_sold_date_sk
         group by d_week_seq, ws_item_sk;
         """
+
+    qt_sum_if_push """
+        select d_week_seq,
+                sum(case when (d_day_name='Monday') then ws_sales_price else null end) mon_sales,
+                sum(case when (d_day_name='Tuesday') then ws_sales_price else  null end) tue_sales,
+                sum(case when (d_day_name='Wednesday') then ws_sales_price else null end) wed_sales,
+                sum(case when (d_day_name='Thursday') then ws_sales_price else null end) thu_sales,
+                sum(case when (d_day_name='Friday') then ws_sales_price else null end) fri_sales,
+                sum(case when (d_day_name='Saturday') then ws_sales_price else null end) sat_sales
+        from web_sales join item on ws_item_sk = i_item_sk
+                        join date_dim on d_date_sk = ws_sold_date_sk
+        group by d_week_seq, ws_item_sk;
+        """
 }

From c57c90d78aec4d7571cd8e09bd36b55fcd6aa0a7 Mon Sep 17 00:00:00 2001
From: englefly 
Date: Sun, 11 Jan 2026 01:34:13 +0800
Subject: [PATCH 13/21] =?UTF-8?q?q5=20=E4=B8=A4=E4=B8=AAsum=EF=BC=880?=
 =?UTF-8?q?=EF=BC=89=E9=94=99=E8=AF=AF=E5=8E=BB=E9=87=8D=E4=BA=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../eageraggregation/EagerAggRewriter.java    | 85 ++++++++++++++++++-
 .../eageraggregation/PushDownAggregation.java |  5 ++
 .../doris/nereids/util/ExpressionUtils.java   |  4 +-
 3 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
index 32c5ac707a9444..1c640f929c945a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
@@ -34,7 +34,9 @@
 import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
 import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
 import org.apache.doris.nereids.trees.plans.logical.LogicalRelation;
+import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
 import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
+import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.nereids.util.ExpressionUtils;
 import org.apache.doris.qe.SessionVariable;
 import org.apache.doris.statistics.ColumnStatistic;
@@ -245,6 +247,85 @@ private boolean canPushThroughProject(LogicalProject project, Pu
         return true;
     }
 
+    @Override
+    public Plan visitLogicalUnion(LogicalUnion union, PushDownAggContext context) {
+        if (!union.getConstantExprsList().isEmpty()) {
+            return union;
+        }
+
+        if (!union.getOutputs().stream().allMatch(e -> e instanceof SlotReference)) {
+            return union;
+        }
+        List newChildren = Lists.newArrayList();
+        List childrenContext = new ArrayList<>();
+        boolean changed = false;
+        for (int idx = 0; idx < union.children().size(); idx++) {
+            Plan child = union.children().get(idx);
+            final int childIdx = idx;
+            List aggFunctionsForChild = new ArrayList<>();
+            Map aliasMapForChild = new HashMap<>();
+            for (AggregateFunction func : context.getAggFunctions()) {
+                AggregateFunction newFunc = (AggregateFunction) union.pushDownExpressionPastSetOperator(func, childIdx);
+                aggFunctionsForChild.add(newFunc);
+                Alias alias = context.getAliasMap().get(func);
+                // aliasForChild should have its own ExprId
+                Alias aliasForChild = new Alias(newFunc, alias.getName(), alias.getQualifier());
+                aliasMapForChild.put(newFunc, aliasForChild);
+            }
+
+            List groupKeysForChild = context.getGroupKeys().stream()
+                    .map(slot -> (SlotReference) union.pushDownExpressionPastSetOperator(slot, childIdx))
+                    .collect(Collectors.toList());
+            PushDownAggContext contextForChild = new PushDownAggContext(aggFunctionsForChild, groupKeysForChild,
+                    aliasMapForChild, context.getCascadesContext(), context.isPassThroughBigJoin());
+            childrenContext.add(contextForChild);
+            Plan newChild = child.accept(this, contextForChild);
+            if (newChild != child) {
+                changed = true;
+            }
+            newChildren.add(newChild);
+        }
+        if (changed) {
+            List> newRegularChildrenOutputs = Lists.newArrayList();
+            for (int i = 0; i < newChildren.size(); i++) {
+                List childOutput = new ArrayList<>();
+                for (SlotReference slot : union.getRegularChildOutput(i)) {
+                    boolean found = false;
+                    for (Slot c : newChildren.get(i).getOutput()) {
+                        if (slot.equals(c)) {
+                            childOutput.add((SlotReference) c);
+                            break;
+                        }
+                    }
+                    if (!found) {
+                        for (AggregateFunction func: childrenContext.get(i).getAliasMap().keySet()) {
+                            if (func.getInputSlots().contains(slot)) {
+                                childOutput.add((SlotReference) childrenContext.get(i).getAliasMap().get(func).toSlot());
+                            }
+                        }
+                    }
+                }
+                newRegularChildrenOutputs.add(childOutput);
+            }
+            List newOutputs = new ArrayList<>();
+            for (int i = 0; i < union.getOutput().size(); i++) {
+                SlotReference originSlot = (SlotReference) union.getOutput().get(i);
+                for (AggregateFunction func: context.getAliasMap().keySet()) {
+                    if (func.getInputSlots().contains(originSlot)) {
+                        originSlot = (SlotReference) context.getAliasMap().get(func).toSlot();
+                    }
+                }
+                DataType dataType = newRegularChildrenOutputs.get(0).get(i).getDataType();
+                newOutputs.add(originSlot.withNullableAndDataType(originSlot.nullable(), dataType));
+            }
+            LogicalUnion newUnion = (LogicalUnion) union.withChildrenAndOutputs(newChildren, newOutputs, newRegularChildrenOutputs);
+            checker.checkTreeAllSlotReferenceFromChildren(newUnion);
+            return newUnion;
+        } else {
+            return union;
+        }
+    }
+
     @Override
     public Plan visitLogicalProject(LogicalProject project, PushDownAggContext context) {
         if (project.child() instanceof LogicalCatalogRelation
@@ -305,8 +386,10 @@ public Plan visitLogicalProject(LogicalProject project, PushDown
             for (Alias alias : context.getAliasMap().values()) {
                 newProjections.add(alias.toSlot());
             }
+            Set newProjectionSlots = newProjections.stream().map(ne -> (SlotReference) ne.toSlot())
+                    .collect(Collectors.toSet());
             for (SlotReference key : context.getGroupKeys()) {
-                if (!newProjections.contains(key)) {
+                if (!newProjectionSlots.contains(key)) {
                     newProjections.add(key);
                 }
             }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
index e31c8c2e6ce01f..20d4e17c6b57d4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
@@ -35,6 +35,7 @@
 package org.apache.doris.nereids.rules.rewrite.eageraggregation;
 
 import org.apache.doris.nereids.jobs.JobContext;
+import org.apache.doris.nereids.rules.analysis.CheckAfterRewrite;
 import org.apache.doris.nereids.rules.analysis.NormalizeAggregate;
 import org.apache.doris.nereids.rules.rewrite.AdjustNullable;
 import org.apache.doris.nereids.trees.expressions.Expression;
@@ -54,6 +55,7 @@
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
 import org.apache.doris.nereids.trees.plans.logical.LogicalRelation;
+import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
 import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter;
 import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
 import org.apache.doris.nereids.util.ExpressionUtils;
@@ -85,11 +87,13 @@ public class PushDownAggregation extends DefaultPlanRewriter impleme
             Min.class);
 
     private final Set acceptNodeType = Sets.newHashSet(
+            LogicalUnion.class,
             LogicalProject.class,
             LogicalFilter.class,
             LogicalRelation.class,
             LogicalJoin.class);
 
+    private CheckAfterRewrite checker = new CheckAfterRewrite();
     @Override
     public Plan rewriteRoot(Plan plan, JobContext jobContext) {
         int mode = SessionVariable.getEagerAggregationMode();
@@ -214,6 +218,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
                 }
                 LogicalAggregate eagerAgg =
                         agg.withAggOutputChild(newOutputExpressions, child);
+                checker.checkTreeAllSlotReferenceFromChildren(eagerAgg);
                 NormalizeAggregate normalizeAggregate = new NormalizeAggregate();
                 LogicalPlan normalized = normalizeAggregate.normalizeAgg(eagerAgg, Optional.empty(),
                         context.getCascadesContext());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java
index 3feeb3a729f0d1..8afd56b70ad0df 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java
@@ -232,9 +232,7 @@ public static Expression rebuildSignature(Expression expr) {
                     if (expr instanceof BoundFunction) {
                         BoundFunction fn = (BoundFunction) expr;
                         BoundFunction rebuilt = (BoundFunction) fn.withChildren(newChildren);
-                        if (!fn.getDataType().equals(rebuilt.getDataType())) {
-                            return new Cast(rebuilt, fn.getDataType());
-                        }
+                        rebuilt = (BoundFunction) TypeCoercionUtils.processBoundFunction(rebuilt);
                         return rebuilt;
                     }
 

From 026bfec1c01194dc27f474ce74604b5357d8bf3a Mon Sep 17 00:00:00 2001
From: englefly 
Date: Tue, 13 Jan 2026 11:33:33 +0800
Subject: [PATCH 14/21] =?UTF-8?q?1.=20sum-if=20=E4=B8=8D=E8=80=83=E8=99=91?=
 =?UTF-8?q?=E7=A9=BF=E8=BF=87bigJoin=EF=BC=8C=202.=20=E6=94=AF=E6=8C=81uni?=
 =?UTF-8?q?on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../eageraggregation/EagerAggRewriter.java    | 177 ++++++++----------
 .../eageraggregation/PushDownAggContext.java  |  31 ++-
 .../eageraggregation/PushDownAggregation.java |  39 +---
 .../trees/plans/logical/LogicalFileScan.java  |   2 +-
 .../trees/plans/logical/LogicalOlapScan.java  |   2 +-
 .../plans/logical/LogicalSetOperation.java    |  23 +++
 .../trees/plans/logical/LogicalUnion.java     |   2 +-
 .../org/apache/doris/qe/SessionVariable.java  |   6 +
 .../org/apache/doris/qe/StmtExecutor.java     |   1 +
 .../data/nereids_p0/eager_agg/eager_agg.out   |  31 ++-
 .../shape_check/tpcds_sf1000/shape/query2.out |  22 +--
 .../tpcds_sf1000_nopkfk/shape/query2.out      |  22 +--
 .../nereids_p0/eager_agg/eager_agg.groovy     |   2 +-
 13 files changed, 176 insertions(+), 184 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
index 1c640f929c945a..9d4f44b2006c9c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
@@ -17,11 +17,12 @@
 
 package org.apache.doris.nereids.rules.rewrite.eageraggregation;
 
-import org.apache.doris.nereids.rules.analysis.CheckAfterRewrite;
+//import org.apache.doris.nereids.rules.analysis.CheckAfterRewrite;
 import org.apache.doris.nereids.rules.analysis.NormalizeAggregate;
 import org.apache.doris.nereids.rules.rewrite.StatsDerive;
 import org.apache.doris.nereids.stats.ExpressionEstimation;
 import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.Cast;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.expressions.Slot;
@@ -37,7 +38,6 @@
 import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
 import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
 import org.apache.doris.nereids.types.DataType;
-import org.apache.doris.nereids.util.ExpressionUtils;
 import org.apache.doris.qe.SessionVariable;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.Statistics;
@@ -45,11 +45,10 @@
 import com.google.common.collect.Lists;
 
 import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
-import java.util.Set;
 import java.util.stream.Collectors;
 
 /**
@@ -71,7 +70,7 @@ public class EagerAggRewriter extends DefaultPlanRewriter {
     private static final double LOW_AGGREGATE_EFFECT_COEFFICIENT = 1000;
     private static final double MEDIUM_AGGREGATE_EFFECT_COEFFICIENT = 100;
     private final StatsDerive derive = new StatsDerive(false);
-    private CheckAfterRewrite checker = new CheckAfterRewrite();
+    //private CheckAfterRewrite checker = new CheckAfterRewrite();
 
     @Override
     public Plan visitLogicalJoin(LogicalJoin join, PushDownAggContext context) {
@@ -149,14 +148,14 @@ public Plan visitLogicalJoin(LogicalJoin join, P
         }
         if (toLeft) {
             Plan newLeft = join.left().accept(this, childContext);
-            checker.checkTreeAllSlotReferenceFromChildren(newLeft);
+            //checker.checkTreeAllSlotReferenceFromChildren(newLeft);
             if (newLeft != join.left()) {
                 return join.withChildren(newLeft, join.right());
             }
         } else {
             Plan newRight = join.right().accept(this, childContext);
             if (newRight != join.right()) {
-                checker.checkTreeAllSlotReferenceFromChildren(newRight);
+                //checker.checkTreeAllSlotReferenceFromChildren(newRight);
                 return join.withChildren(join.left(), newRight);
             }
         }
@@ -185,13 +184,6 @@ private List getJoinConditionsInputSlotsFromOneSide(LogicalJoin project,
             PushDownAggContext context) {
-        HashMap replaceMapAlias = new HashMap<>();
-        for (NamedExpression ne : project.getProjects()) {
-            if (ne instanceof Alias) {
-                replaceMapAlias.put(ne.toSlot(), ne);
-            }
-        }
-
         /*
          * context: sum(a) groupBy(y+z as x, l)
          * proj: b+c as a, u+v as y, m+n as l
@@ -206,7 +198,7 @@ private PushDownAggContext createContextFromProject(LogicalProject aggFunctions = new ArrayList<>();
-        Map aliasMap = new HashMap<>();
+        Map aliasMap = new IdentityHashMap<>();
         for (AggregateFunction aggFunc : context.getAggFunctions()) {
             AggregateFunction newAggFunc = (AggregateFunction) project.pushDownExpressionPastProject(aggFunc);
             Alias alias = context.getAliasMap().get(aggFunc);
@@ -247,6 +239,30 @@ private boolean canPushThroughProject(LogicalProject project, Pu
         return true;
     }
 
+    private Plan alignUnionChildrenDataType(Plan child, PushDownAggContext context) {
+        int outputSize = child.getOutput().size();
+        List outputDataType = Lists.newArrayListWithExpectedSize(outputSize);
+        outputDataType.addAll(context.getAggFunctions().stream()
+                .map(func -> context.getAliasMap().get(func).getDataType()).collect(Collectors.toList()));
+        outputDataType.addAll(context.getGroupKeys().stream().map(s -> s.getDataType()).collect(Collectors.toList()));
+        List projection = Lists.newArrayListWithExpectedSize(outputSize);
+        boolean needProject = false;
+        for (int colIdx = 0; colIdx < outputSize; colIdx++) {
+            SlotReference slot = (SlotReference) child.getOutput().get(colIdx);
+            if (!slot.getDataType().equals(outputDataType.get(colIdx))) {
+                projection.add(new Alias(new Cast(slot, outputDataType.get(colIdx))));
+                needProject = true;
+            } else {
+                projection.add(slot);
+            }
+        }
+        if (needProject) {
+            return new LogicalProject(projection, child);
+        } else {
+            return child;
+        }
+    }
+
     @Override
     public Plan visitLogicalUnion(LogicalUnion union, PushDownAggContext context) {
         if (!union.getConstantExprsList().isEmpty()) {
@@ -263,7 +279,7 @@ public Plan visitLogicalUnion(LogicalUnion union, PushDownAggContext context) {
             Plan child = union.children().get(idx);
             final int childIdx = idx;
             List aggFunctionsForChild = new ArrayList<>();
-            Map aliasMapForChild = new HashMap<>();
+            IdentityHashMap aliasMapForChild = new IdentityHashMap<>();
             for (AggregateFunction func : context.getAggFunctions()) {
                 AggregateFunction newFunc = (AggregateFunction) union.pushDownExpressionPastSetOperator(func, childIdx);
                 aggFunctionsForChild.add(newFunc);
@@ -283,43 +299,33 @@ public Plan visitLogicalUnion(LogicalUnion union, PushDownAggContext context) {
             if (newChild != child) {
                 changed = true;
             }
+            // all children need align data type, even if it is not rewritten
+            newChild = alignUnionChildrenDataType(newChild, context);
             newChildren.add(newChild);
         }
         if (changed) {
-            List> newRegularChildrenOutputs = Lists.newArrayList();
-            for (int i = 0; i < newChildren.size(); i++) {
-                List childOutput = new ArrayList<>();
-                for (SlotReference slot : union.getRegularChildOutput(i)) {
-                    boolean found = false;
-                    for (Slot c : newChildren.get(i).getOutput()) {
-                        if (slot.equals(c)) {
-                            childOutput.add((SlotReference) c);
-                            break;
-                        }
-                    }
-                    if (!found) {
-                        for (AggregateFunction func: childrenContext.get(i).getAliasMap().keySet()) {
-                            if (func.getInputSlots().contains(slot)) {
-                                childOutput.add((SlotReference) childrenContext.get(i).getAliasMap().get(func).toSlot());
-                            }
-                        }
-                    }
-                }
-                newRegularChildrenOutputs.add(childOutput);
+            List> newRegularChildrenOutputs = Lists.newArrayListWithExpectedSize(union.arity());
+            for (int childIdx = 0; childIdx < union.arity(); childIdx++) {
+                newRegularChildrenOutputs.add(
+                        newChildren.get(childIdx).getOutput().stream()
+                                .map(s -> (SlotReference) s).collect(Collectors.toList()));
             }
-            List newOutputs = new ArrayList<>();
-            for (int i = 0; i < union.getOutput().size(); i++) {
-                SlotReference originSlot = (SlotReference) union.getOutput().get(i);
-                for (AggregateFunction func: context.getAliasMap().keySet()) {
-                    if (func.getInputSlots().contains(originSlot)) {
-                        originSlot = (SlotReference) context.getAliasMap().get(func).toSlot();
-                    }
+
+            List newOutput = Lists.newArrayList();
+            for (AggregateFunction func : context.getAggFunctions()) {
+                Alias alias = context.getAliasMap().get(func);
+                if (alias == null) {
+                    SessionVariable.throwRuntimeExceptionWhenFeDebug("push down agg failed. union: " + union
+                            + " context: " + context);
+                    return union;
                 }
-                DataType dataType = newRegularChildrenOutputs.get(0).get(i).getDataType();
-                newOutputs.add(originSlot.withNullableAndDataType(originSlot.nullable(), dataType));
+                newOutput.add(alias.toSlot());
             }
-            LogicalUnion newUnion = (LogicalUnion) union.withChildrenAndOutputs(newChildren, newOutputs, newRegularChildrenOutputs);
-            checker.checkTreeAllSlotReferenceFromChildren(newUnion);
+            newOutput.addAll(context.getGroupKeys());
+
+            LogicalUnion newUnion = (LogicalUnion) union
+                    .withChildrenAndOutputs(newChildren, newOutput, newRegularChildrenOutputs);
+            //checker.checkTreeAllSlotReferenceFromChildren(newUnion);
             return newUnion;
         } else {
             return union;
@@ -330,7 +336,7 @@ public Plan visitLogicalUnion(LogicalUnion union, PushDownAggContext context) {
     public Plan visitLogicalProject(LogicalProject project, PushDownAggContext context) {
         if (project.child() instanceof LogicalCatalogRelation
                 || (project.child() instanceof LogicalFilter
-                && project.child().child(0) instanceof LogicalCatalogRelation)) {
+                        && project.child().child(0) instanceof LogicalCatalogRelation)) {
             // project
             //   --> scan
             // =>
@@ -346,7 +352,7 @@ public Plan visitLogicalProject(LogicalProject project, PushDown
 
         PushDownAggContext newContext = createContextFromProject(project, context);
         Plan newChild = project.child().accept(this, newContext);
-        checker.checkTreeAllSlotReferenceFromChildren(newChild);
+        //checker.checkTreeAllSlotReferenceFromChildren(newChild);
         if (newChild != project.child()) {
             /*
              * agg[sum(a), groupBy(b)]
@@ -363,59 +369,33 @@ public Plan visitLogicalProject(LogicalProject project, PushDown
              *                  -> any(a, b1, b2, c)
              *          -> any(d, ...)
              */
-            Set aggFuncInputSlots = context.getAggFunctionsInputSlots();
             List newProjections = new ArrayList<>();
-            for (NamedExpression ne : project.getProjects()) {
-                if (newChild.getOutputSet().containsAll(ne.getInputSlots())) {
-                    newProjections.add(ne);
-                } else {
-                    // if ne is not child output, it means ne is used by aggFunc
-                    // push sum(a) through project(expr as a)
-                    // "sum(expr)" is pushed, and newChild output x, x is alias of sum(expr)
-                    // the new project should output x.
-                    if (!aggFuncInputSlots.contains(ne.toSlot())) {
-                        if (SessionVariable.isFeDebug()) {
-                            throw new RuntimeException("push down Agg failed: " + ne + " is not in project \n"
-                                    + project.treeString());
-                        } else {
-                            return project;
-                        }
-                    }
-                }
-            }
             for (Alias alias : context.getAliasMap().values()) {
                 newProjections.add(alias.toSlot());
             }
-            Set newProjectionSlots = newProjections.stream().map(ne -> (SlotReference) ne.toSlot())
-                    .collect(Collectors.toSet());
-            for (SlotReference key : context.getGroupKeys()) {
-                if (!newProjectionSlots.contains(key)) {
-                    newProjections.add(key);
+            for (SlotReference slot : context.getGroupKeys()) {
+                boolean valid = false;
+                for (NamedExpression ne : project.getProjects()) {
+                    if (ne.toSlot().getExprId().equals(slot.getExprId())) {
+                        valid = true;
+                        newProjections.add(ne);
+                        break;
+                    }
+                }
+                if (!valid) {
+                    SessionVariable.throwRuntimeExceptionWhenFeDebug(
+                            "push agg failed. slot: " + "not found in " + project);
+                    return project;
                 }
             }
-
-            return project.withProjectsAndChild(
-                    newProjections.stream().map(e -> (NamedExpression) replaceBySlots(e, newChild.getOutput()))
-                            .collect(Collectors.toList()),
-                    newChild);
+            LogicalProject result = new LogicalProject(newProjections, newChild);
+            //checker.checkTreeAllSlotReferenceFromChildren(result);
+            return result;
         }
 
         return project;
     }
 
-    private static Expression replaceBySlots(Expression expression, List slots) {
-        Map replaceMap = new HashMap<>();
-        for (Slot slot1 : expression.getInputSlots()) {
-            for (Slot slot2 : slots) {
-                if (slot1.getExprId().asInt() == slot2.getExprId().asInt()) {
-                    replaceMap.put(slot1, slot2);
-                }
-            }
-        }
-        Expression result = ExpressionUtils.replace(expression, replaceMap);
-        return result;
-    }
-
     @Override
     public Plan visitLogicalAggregate(LogicalAggregate agg, PushDownAggContext context) {
         return agg;
@@ -434,7 +414,9 @@ public Plan visitLogicalRelation(LogicalRelation relation, PushDownAggContext co
     private Plan genAggregate(Plan child, PushDownAggContext context) {
         if (checkStats(child, context)) {
             List aggOutputExpressions = new ArrayList<>();
-            aggOutputExpressions.addAll(context.getAliasMap().values());
+            for (AggregateFunction func : context.getAggFunctions()) {
+                aggOutputExpressions.add(context.getAliasMap().get(func));
+            }
             aggOutputExpressions.addAll(context.getGroupKeys());
             LogicalAggregate genAgg = new LogicalAggregate(context.getGroupKeys(), aggOutputExpressions, child);
             NormalizeAggregate normalizeAggregate = new NormalizeAggregate();
@@ -475,7 +457,7 @@ private boolean checkStats(Plan plan, PushDownAggContext context) {
         List medium = Lists.newArrayList();
         List high = Lists.newArrayList();
 
-        List[] cards = new List[] {lower, medium, high};
+        List[] cards = new List[] { lower, medium, high };
 
         for (NamedExpression key : context.getGroupKeys()) {
             ColumnStatistic colStats = ExpressionEstimation.INSTANCE.estimate(key, stats);
@@ -516,20 +498,17 @@ private boolean checkStats(Plan plan, PushDownAggContext context) {
         }
 
         // 3. Extremely low cardinality for lower with at most one medium or high.
-        double lowerCartesianLowerBound =
-                stats.getRowCount() / LOWER_AGGREGATE_EFFECT_COEFFICIENT;
+        double lowerCartesianLowerBound = stats.getRowCount() / LOWER_AGGREGATE_EFFECT_COEFFICIENT;
         if (high.size() + medium.size() == 1 && lower.size() <= 2 && lowerCartesian <= lowerCartesianLowerBound) {
             return true;
-            // StatsCalculator statsCalculator = new StatsCalculator(null);
-            // double estAggRowCount = statsCalculator.estimateGroupByRowCount(context.getGroupKeys(), stats);
-            // return estAggRowCount < lowerCartesianLowerBound;
         }
 
         return false;
     }
 
     // high(2): row_count / cardinality < MEDIUM_AGGREGATE_EFFECT_COEFFICIENT
-    // medium(1): row_count / cardinality >= MEDIUM_AGGREGATE_EFFECT_COEFFICIENT and < LOW_AGGREGATE_EFFECT_COEFFICIENT
+    // medium(1): row_count / cardinality >= MEDIUM_AGGREGATE_EFFECT_COEFFICIENT and
+    // < LOW_AGGREGATE_EFFECT_COEFFICIENT
     // lower(0): row_count / cardinality >= LOW_AGGREGATE_EFFECT_COEFFICIENT
     private int groupByCardinality(ColumnStatistic colStats, double rowCount) {
         if (rowCount == 0 || colStats.ndv * MEDIUM_AGGREGATE_EFFECT_COEFFICIENT > rowCount) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java
index 181a7000e4d371..15e27227561ffa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java
@@ -24,9 +24,9 @@
 import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
 
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
 
+import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -38,7 +38,7 @@ public class PushDownAggContext {
     public static final int BIG_JOIN_BUILD_SIZE = 400_000;
     private final List aggFunctions;
     private final List groupKeys;
-    private final Map aliasMap;
+    private final IdentityHashMap aliasMap;
     private final Set aggFunctionsInputSlots;
 
     // cascadesContext is used for normalizeAgg
@@ -65,16 +65,21 @@ public PushDownAggContext(List aggFunctions,
         this.aggFunctions = ImmutableList.copyOf(aggFunctions);
         this.cascadesContext = cascadesContext;
 
+        IdentityHashMap builtAliasMap = new IdentityHashMap<>();
         if (aliasMap == null) {
-            ImmutableMap.Builder aliasMapBuilder = ImmutableMap.builder();
             for (AggregateFunction aggFunction : this.aggFunctions) {
-                Alias alias = new Alias(aggFunction, aggFunction.getName());
-                aliasMapBuilder.put(aggFunction, alias);
+                builtAliasMap.put(aggFunction, new Alias(aggFunction, aggFunction.getName()));
             }
-            this.aliasMap = aliasMapBuilder.build();
         } else {
-            this.aliasMap = aliasMap;
+            for (AggregateFunction aggFunction : this.aggFunctions) {
+                Alias alias = aliasMap.get(aggFunction);
+                if (alias == null) {
+                    alias = new Alias(aggFunction, aggFunction.getName());
+                }
+                builtAliasMap.put(aggFunction, alias);
+            }
         }
+        this.aliasMap = builtAliasMap;
 
         this.aggFunctionsInputSlots = aggFunctions.stream()
                 .flatMap(aggFunction -> aggFunction.getInputSlots().stream())
@@ -87,7 +92,7 @@ public PushDownAggContext passThroughBigJoin() {
         return new PushDownAggContext(aggFunctions, groupKeys, aliasMap, cascadesContext, true);
     }
 
-    public Map getAliasMap() {
+    public IdentityHashMap getAliasMap() {
         return aliasMap;
     }
 
@@ -114,4 +119,14 @@ public CascadesContext getCascadesContext() {
     public boolean isPassThroughBigJoin() {
         return passThroughBigJoin;
     }
+
+    @Override
+    public String toString() {
+        return "PushDownAggContext{"
+                + "aggFunctions=" + aggFunctions
+                + ", groupKeys=" + groupKeys
+                + ", aliasMap=" + aliasMap
+                + ", passThroughBigJoin=" + passThroughBigJoin
+                + '}';
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
index 20d4e17c6b57d4..cea1597b9d5420 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
@@ -35,7 +35,7 @@
 package org.apache.doris.nereids.rules.rewrite.eageraggregation;
 
 import org.apache.doris.nereids.jobs.JobContext;
-import org.apache.doris.nereids.rules.analysis.CheckAfterRewrite;
+//import org.apache.doris.nereids.rules.analysis.CheckAfterRewrite;
 import org.apache.doris.nereids.rules.analysis.NormalizeAggregate;
 import org.apache.doris.nereids.rules.rewrite.AdjustNullable;
 import org.apache.doris.nereids.trees.expressions.Expression;
@@ -93,7 +93,8 @@ public class PushDownAggregation extends DefaultPlanRewriter impleme
             LogicalRelation.class,
             LogicalJoin.class);
 
-    private CheckAfterRewrite checker = new CheckAfterRewrite();
+    //private CheckAfterRewrite checker = new CheckAfterRewrite();
+
     @Override
     public Plan rewriteRoot(Plan plan, JobContext jobContext) {
         int mode = SessionVariable.getEagerAggregationMode();
@@ -108,7 +109,6 @@ public Plan rewriteRoot(Plan plan, JobContext jobContext) {
     public Plan visitLogicalAggregate(LogicalAggregate agg, JobContext context) {
         Plan newChild = agg.child().accept(this, context);
         if (newChild != agg.child()) {
-            // TODO : push down upper aggregations
             return agg.withChildren(newChild);
         }
 
@@ -131,7 +131,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
         }
 
         List aggFunctions = new ArrayList<>();
-
+        boolean hasSumIf = false;
         for (AggregateFunction aggFunction : agg.getAggregateFunctions()) {
             if (pushDownAggFunctionSet.contains(aggFunction.getClass())
                     && !aggFunction.isDistinct()) {
@@ -143,6 +143,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
                     }
                     groupKeys.addAll(body.getCondition().getInputSlots()
                             .stream().map(slot -> (SlotReference) slot).collect(Collectors.toList()));
+                    hasSumIf = true;
                 } else {
                     aggFunctions.add(aggFunction);
                 }
@@ -161,7 +162,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
         }
 
         PushDownAggContext pushDownContext = new PushDownAggContext(new ArrayList<>(aggFunctions),
-                groupKeys, context.getCascadesContext());
+                groupKeys, null, context.getCascadesContext(), hasSumIf);
         try {
             Plan child = agg.child().accept(writer, pushDownContext);
             if (child != agg.child()) {
@@ -184,32 +185,6 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
                 for (NamedExpression ne : agg.getOutputExpressions()) {
                     if (ne instanceof SlotReference) {
                         newOutputExpressions.add(ne);
-                    //} else if (ne instanceof Alias
-                    //        && ne.child(0) instanceof Sum
-                    //        && ne.child(0).child(0) instanceof If
-                    //        && ne.child(0).child(0).child(1) instanceof SlotReference) {
-                    //    SlotReference targetSlot = (SlotReference) ne.child(0).child(0).child(1);
-                    //    Slot toReplace = null;
-                    //    for (Slot slot : child.getOutput()) {
-                    //        if (slot.getExprId().equals(targetSlot.getExprId())) {
-                    //            toReplace = slot;
-                    //        }
-                    //    }
-                    //    if (toReplace != null) {
-                    //        Alias newOutput = (Alias) ((Alias) ne).withChildren(
-                    //                new Sum(
-                    //                        new If(
-                    //                                ne.child(0).child(0).child(0),
-                    //                                toReplace,
-                    //                                new NullLiteral(toReplace.getDataType())
-                    //                        )
-                    //                )
-                    //        );
-                    //        newOutputExpressions.add(newOutput);
-                    //    } else {
-                    //        return agg;
-                    //    }
-
                     } else {
                         NamedExpression replaceAliasExpr = (NamedExpression) ExpressionUtils.replace(ne, replaceMap);
                         replaceAliasExpr = (NamedExpression) ExpressionUtils.rebuildSignature(replaceAliasExpr);
@@ -218,7 +193,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
                 }
                 LogicalAggregate eagerAgg =
                         agg.withAggOutputChild(newOutputExpressions, child);
-                checker.checkTreeAllSlotReferenceFromChildren(eagerAgg);
+                //checker.checkTreeAllSlotReferenceFromChildren(eagerAgg);
                 NormalizeAggregate normalizeAggregate = new NormalizeAggregate();
                 LogicalPlan normalized = normalizeAggregate.normalizeAgg(eagerAgg, Optional.empty(),
                         context.getCascadesContext());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java
index 5d4c1167a82bc2..c249ca7976772d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java
@@ -126,7 +126,7 @@ public ExternalTable getTable() {
 
     @Override
     public String toString() {
-        return Utils.toSqlStringSkipNull("LogicalFileScan",
+        return Utils.toSqlStringSkipNull("LogicalFileScan[" + id.asInt() + "]",
                 "qualified", qualifiedName(),
                 "alias", tableAlias,
                 "output", getOutput(),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
index a334b8ddcc5cc6..1869e5e44b0d9d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
@@ -295,7 +295,7 @@ public OlapTable getTable() {
 
     @Override
     public String toString() {
-        return Utils.toSqlStringSkipNull("LogicalOlapScan",
+        return Utils.toSqlStringSkipNull("LogicalOlapScan[" + id.asInt() + "]",
                 "qualified", qualifiedName(),
                 "alias", tableAlias,
                 "indexName", getSelectedMaterializedIndexName().orElse(""),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java
index 69447253cbaa18..161490e72a593c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java
@@ -77,12 +77,35 @@ public LogicalSetOperation(PlanType planType, Qualifier qualifier, List ch
         this.regularChildrenOutputs = ImmutableList.of();
     }
 
+    /**
+     * constructor
+     */
     public LogicalSetOperation(PlanType planType, Qualifier qualifier,
             List outputs, List> regularChildrenOutputs, List children) {
         super(planType, children);
         this.qualifier = qualifier;
         this.outputs = ImmutableList.copyOf(outputs);
         this.regularChildrenOutputs = ImmutableList.copyOf(regularChildrenOutputs);
+        // for (List regular : regularChildrenOutputs) {
+        //     if (outputs.size() != regular.size()) {
+        //         System.out.println("eeeee");
+        //     }
+        // }
+        // for (int childIdx = 0; childIdx < children().size(); childIdx++) {
+        //     if (regularChildrenOutputs.isEmpty()) {
+        //         continue;
+        //     }
+        //     List regularList = regularChildrenOutputs.get(childIdx);
+        //     for (int colIdx = 0; colIdx < outputs.size(); colIdx++) {
+        //         SlotReference regularSlot = regularList.get(colIdx);
+        //         Plan currChild = children.get(childIdx);
+        //         boolean contains = currChild.getOutputExprIds().contains(regularSlot.getExprId());
+        //         if (!contains) {
+        //             System.out.println("errrr");
+        //         }
+        //     }
+        // }
+
     }
 
     public LogicalSetOperation(PlanType planType, Qualifier qualifier, List outputs,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java
index 7dc1507cc7a24b..55ffc5a286a820 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java
@@ -114,7 +114,7 @@ public List getExpressions() {
 
     @Override
     public String toString() {
-        return Utils.toSqlStringSkipNull("LogicalUnion",
+        return Utils.toSqlStringSkipNull("LogicalUnion[" + id.asInt() + "]",
                 "qualifier", qualifier,
                 "outputs", outputs,
                 "regularChildrenOutputs", regularChildrenOutputs,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 3a7fccc8380ea1..5b8ee8e9eb1b6b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -5936,6 +5936,12 @@ public static boolean isFeDebug() {
         }
     }
 
+    public static void throwRuntimeExceptionWhenFeDebug(String msg) {
+        if (isFeDebug()) {
+            throw new RuntimeException(msg);
+        }
+    }
+
     public Map getAffectQueryResultInPlanVariables() {
         ImmutableMap.Builder builder = ImmutableMap.builder();
         try {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
index fc409fefebf40d..cdceb10f87bff4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@@ -775,6 +775,7 @@ private void executeByNereids(TUniqueId queryId) throws Exception {
                     LOG.debug("Command({}) process failed.", originStmt.originStmt, e);
                 }
                 context.getState().setError(ErrorCode.ERR_UNKNOWN_ERROR, e.getMessage());
+                e.printStackTrace();
                 throw new NereidsException("Command (" + originStmt.originStmt + ") process failed.",
                         new AnalysisException(e.getMessage() == null ? e.toString() : e.getMessage(), e));
             }
diff --git a/regression-test/data/nereids_p0/eager_agg/eager_agg.out b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
index c3a9ec7b1c5bc7..c1d7ed845a0749 100644
--- a/regression-test/data/nereids_p0/eager_agg/eager_agg.out
+++ b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
@@ -155,23 +155,20 @@ SyntaxError:
 -- !groupkey_push_exe --
 
 -- !sum_if_push --
-cost = 28.726000000000003
-PhysicalResultSink[586] ( outputExprs=[d_week_seq#60, mon_sales#84, tue_sales#85, wed_sales#86, thu_sales#87, fri_sales#88, sat_sales#89] )
-+--PhysicalProject[582]@12 ( stats=1, projects=[d_week_seq#60, mon_sales#84, tue_sales#85, wed_sales#86, thu_sales#87, fri_sales#88, sat_sales#89] )
-   +--PhysicalHashAggregate[578]@11 ( stats=1, aggPhase=GLOBAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[d_week_seq#60, ws_item_sk#3], outputExpr=[d_week_seq#60, ws_item_sk#3, sum(if((d_day_name#70 = 'Monday'), sum(ws_sales_price)#21, NULL)) AS `mon_sales`#84, sum(if((d_day_name#70 = 'Tuesday'), sum(ws_sales_price)#21, NULL)) AS `tue_sales`#85, sum(if((d_day_name#70 = 'Wednesday'), sum(ws_sales_price)#21, NULL)) AS `wed_sales`#86, sum(if((d_day_name#70 = 'Thursday'), sum(ws_sales_price)#21, NULL)) AS `thu_sales`#87, sum(if((d_day_name#70 = 'Friday'), sum(ws_sales_price)#21, NULL)) AS `fri_sales`#88, sum(if((d_day_name#70 = 'Saturday'), sum(ws_sales_price)#21, NULL)) AS `sat_sales`#89], partitionExpr=Optional.empty, topnFilter=false, topnPushDown=false )
-      +--PhysicalProject[574]@10 ( stats=2, projects=[d_week_seq#60, ws_item_sk#3, sum(ws_sales_price)#21, d_day_name#70] )
-         +--PhysicalHashJoin[570]@9 ( stats=2, type=INNER_JOIN, hashCondition=[(d_date_sk#56 = ws_sold_date_sk#0)], otherCondition=[], markCondition=[] )
-            |--PhysicalProject[557]@6 ( stats=2, projects=[ws_sold_date_sk#0, ws_item_sk#3, sum(ws_sales_price)#21] )
-            |  +--PhysicalHashJoin[553]@5 ( stats=2, type=INNER_JOIN, hashCondition=[(ws_item_sk#3 = i_item_sk#34)], otherCondition=[], markCondition=[] )
-            |     |--PhysicalProject[536]@4 ( stats=2, projects=[i_item_sk#34] )
-            |     |  +--PhysicalOlapScan[item]@3 ( stats=2, operativeSlots=[i_item_sk#34], virtualColumns=[] )
-            |     +--PhysicalDistribute[549]@2 ( stats=1, distributionSpec=DistributionSpecReplicated )
-            |        +--PhysicalHashAggregate[545]@2 ( stats=1, aggPhase=GLOBAL, aggMode=INPUT_TO_RESULT, maybeUseStreaming=false, groupByExpr=[ws_sold_date_sk#0, ws_item_sk#3], outputExpr=[ws_sold_date_sk#0, ws_item_sk#3, sum(ws_sales_price#21) AS `sum(ws_sales_price)`#21], partitionExpr=Optional.empty, topnFilter=false, topnPushDown=false )
-            |           +--PhysicalProject[541]@1 ( stats=1, projects=[ws_sold_date_sk#0, ws_item_sk#3, ws_sales_price#21] )
-            |              +--PhysicalOlapScan[web_sales]@0 ( stats=1, operativeSlots=[ws_sold_date_sk#0, ws_item_sk#3, ws_sales_price#21], virtualColumns=[] )
-            +--PhysicalDistribute[566]@8 ( stats=1, distributionSpec=DistributionSpecReplicated )
-               +--PhysicalProject[562]@8 ( stats=1, projects=[d_date_sk#56, d_week_seq#60, d_day_name#70] )
-                  +--PhysicalOlapScan[date_dim]@7 ( stats=1, operativeSlots=[d_date_sk#56, d_week_seq#60, d_day_name#70], virtualColumns=[] )
+PhysicalResultSink
+--PhysicalProject
+----hashAgg[GLOBAL]
+------PhysicalProject
+--------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=()
+----------PhysicalProject
+------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
+--------------PhysicalProject
+----------------PhysicalOlapScan[item]
+--------------hashAgg[GLOBAL]
+----------------PhysicalProject
+------------------PhysicalOlapScan[web_sales]
+----------PhysicalProject
+------------PhysicalOlapScan[date_dim]
 
 -- !sum_if_push --
 1	\N	\N	\N	\N	\N	\N
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query2.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query2.out
index 2522a1a9f3f342..41ad24ad066406 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query2.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query2.out
@@ -10,18 +10,16 @@ PhysicalCteAnchor ( cteId=CTEId#1 )
 --------------PhysicalProject
 ----------------PhysicalOlapScan[date_dim] apply RFs: RF0
 --------------PhysicalUnion
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[web_sales]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[catalog_sales]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_sales]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[catalog_sales]
 --PhysicalResultSink
 ----PhysicalQuickSort[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query2.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query2.out
index 2522a1a9f3f342..41ad24ad066406 100644
--- a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query2.out
+++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query2.out
@@ -10,18 +10,16 @@ PhysicalCteAnchor ( cteId=CTEId#1 )
 --------------PhysicalProject
 ----------------PhysicalOlapScan[date_dim] apply RFs: RF0
 --------------PhysicalUnion
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[web_sales]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[catalog_sales]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_sales]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[catalog_sales]
 --PhysicalResultSink
 ----PhysicalQuickSort[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
index 29a859786759cd..b59569ab2656f5 100644
--- a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
+++ b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
@@ -178,7 +178,7 @@ suite("eager_agg") {
     """
 
     qt_sum_if_push """
-        explain physical plan
+        explain shape plan
         select d_week_seq,
                 sum(case when (d_day_name='Monday') then ws_sales_price else null end) mon_sales,
                 sum(case when (d_day_name='Tuesday') then ws_sales_price else  null end) tue_sales,

From c71df452b50e46b317292339bfac543653a0a92a Mon Sep 17 00:00:00 2001
From: englefly 
Date: Tue, 13 Jan 2026 16:00:26 +0800
Subject: [PATCH 15/21] remove unused code

---
 .../doris/nereids/jobs/executor/Rewriter.java |   2 -
 .../eageraggregation/EagerAggRewriter.java    |  17 +-
 .../eageraggregation/PushDownAggContext.java  |   9 -
 .../eageraggregation/PushDownAggregation.java |  31 +-
 .../PushdownSumIfAggregation.java             | 156 ---------
 .../eageraggregation/SumAggContext.java       |  48 ---
 .../eageraggregation/SumAggWriter.java        | 320 ------------------
 .../plans/logical/LogicalSetOperation.java    |  20 --
 .../doris/nereids/util/ExpressionUtils.java   |  64 ++--
 .../org/apache/doris/qe/StmtExecutor.java     |   1 -
 10 files changed, 39 insertions(+), 629 deletions(-)
 delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java
 delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java
 delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index 356da3576ceb7e..181cb67867b7a9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -171,7 +171,6 @@
 import org.apache.doris.nereids.rules.rewrite.batch.CorrelateApplyToUnCorrelateApply;
 import org.apache.doris.nereids.rules.rewrite.batch.EliminateUselessPlanUnderApply;
 import org.apache.doris.nereids.rules.rewrite.eageraggregation.PushDownAggregation;
-import org.apache.doris.nereids.rules.rewrite.eageraggregation.PushdownSumIfAggregation;
 import org.apache.doris.nereids.trees.plans.algebra.SetOperation;
 import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
 import org.apache.doris.nereids.trees.plans.logical.LogicalApply;
@@ -676,7 +675,6 @@ public class Rewriter extends AbstractBatchJobExecutor {
                         )),
 
                         costBased(custom(RuleType.PUSH_DOWN_DISTINCT_THROUGH_JOIN, PushDownDistinctThroughJoin::new)),
-                        custom(RuleType.PUSH_DOWN_AGG_THROUGH_JOIN, PushdownSumIfAggregation::new),
                         custom(RuleType.PUSH_DOWN_AGG_THROUGH_JOIN, PushDownAggregation::new),
                         topDown(new PushCountIntoUnionAll())
                 ),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
index 9d4f44b2006c9c..32db11e6cfa6e8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
@@ -17,7 +17,6 @@
 
 package org.apache.doris.nereids.rules.rewrite.eageraggregation;
 
-//import org.apache.doris.nereids.rules.analysis.CheckAfterRewrite;
 import org.apache.doris.nereids.rules.analysis.NormalizeAggregate;
 import org.apache.doris.nereids.rules.rewrite.StatsDerive;
 import org.apache.doris.nereids.stats.ExpressionEstimation;
@@ -70,7 +69,6 @@ public class EagerAggRewriter extends DefaultPlanRewriter {
     private static final double LOW_AGGREGATE_EFFECT_COEFFICIENT = 1000;
     private static final double MEDIUM_AGGREGATE_EFFECT_COEFFICIENT = 100;
     private final StatsDerive derive = new StatsDerive(false);
-    //private CheckAfterRewrite checker = new CheckAfterRewrite();
 
     @Override
     public Plan visitLogicalJoin(LogicalJoin join, PushDownAggContext context) {
@@ -148,21 +146,20 @@ public Plan visitLogicalJoin(LogicalJoin join, P
         }
         if (toLeft) {
             Plan newLeft = join.left().accept(this, childContext);
-            //checker.checkTreeAllSlotReferenceFromChildren(newLeft);
             if (newLeft != join.left()) {
                 return join.withChildren(newLeft, join.right());
             }
         } else {
             Plan newRight = join.right().accept(this, childContext);
             if (newRight != join.right()) {
-                //checker.checkTreeAllSlotReferenceFromChildren(newRight);
                 return join.withChildren(join.left(), newRight);
             }
         }
         return join;
     }
 
-    private List getJoinConditionsInputSlotsFromOneSide(LogicalJoin join,
+    private List getJoinConditionsInputSlotsFromOneSide(
+            LogicalJoin join,
             Plan side) {
         List oneSideSlots = new ArrayList<>();
         for (Expression condition : join.getHashJoinConjuncts()) {
@@ -182,7 +179,8 @@ private List getJoinConditionsInputSlotsFromOneSide(LogicalJoin project,
+    private PushDownAggContext createContextFromProject(
+            LogicalProject project,
             PushDownAggContext context) {
         /*
          * context: sum(a) groupBy(y+z as x, l)
@@ -212,11 +210,15 @@ private PushDownAggContext createContextFromProject(LogicalProject project, PushDownAggContext context) {
         for (SlotReference slot : context.getGroupKeys()) {
             if (!project.getOutputSet().contains(slot)) {
+                SessionVariable.throwRuntimeExceptionWhenFeDebug("eager agg failed: can not find group key("
+                        + slot + ") in " + project);
                 return false;
             }
         }
         for (Slot slot : context.getAggFunctionsInputSlots()) {
             if (!project.getOutputSet().contains(slot)) {
+                SessionVariable.throwRuntimeExceptionWhenFeDebug("eager agg failed: can not find aggFunc slot("
+                        + slot + ") in " + project);
                 return false;
             }
         }
@@ -325,7 +327,6 @@ public Plan visitLogicalUnion(LogicalUnion union, PushDownAggContext context) {
 
             LogicalUnion newUnion = (LogicalUnion) union
                     .withChildrenAndOutputs(newChildren, newOutput, newRegularChildrenOutputs);
-            //checker.checkTreeAllSlotReferenceFromChildren(newUnion);
             return newUnion;
         } else {
             return union;
@@ -352,7 +353,6 @@ public Plan visitLogicalProject(LogicalProject project, PushDown
 
         PushDownAggContext newContext = createContextFromProject(project, context);
         Plan newChild = project.child().accept(this, newContext);
-        //checker.checkTreeAllSlotReferenceFromChildren(newChild);
         if (newChild != project.child()) {
             /*
              * agg[sum(a), groupBy(b)]
@@ -389,7 +389,6 @@ public Plan visitLogicalProject(LogicalProject project, PushDown
                 }
             }
             LogicalProject result = new LogicalProject(newProjections, newChild);
-            //checker.checkTreeAllSlotReferenceFromChildren(result);
             return result;
         }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java
index 15e27227561ffa..e120ddba4c6e17 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggContext.java
@@ -46,15 +46,6 @@ public class PushDownAggContext {
 
     private final boolean passThroughBigJoin;
 
-    /**
-     * constructor
-     */
-    public PushDownAggContext(List aggFunctions,
-            List groupKeys,
-            CascadesContext cascadesContext) {
-        this(aggFunctions, groupKeys, null, cascadesContext, false);
-    }
-
     /**
      * constructor
      */
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
index cea1597b9d5420..d8ff3ce5c3e510 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
@@ -35,7 +35,6 @@
 package org.apache.doris.nereids.rules.rewrite.eageraggregation;
 
 import org.apache.doris.nereids.jobs.JobContext;
-//import org.apache.doris.nereids.rules.analysis.CheckAfterRewrite;
 import org.apache.doris.nereids.rules.analysis.NormalizeAggregate;
 import org.apache.doris.nereids.rules.rewrite.AdjustNullable;
 import org.apache.doris.nereids.trees.expressions.Expression;
@@ -93,8 +92,6 @@ public class PushDownAggregation extends DefaultPlanRewriter impleme
             LogicalRelation.class,
             LogicalJoin.class);
 
-    //private CheckAfterRewrite checker = new CheckAfterRewrite();
-
     @Override
     public Plan rewriteRoot(Plan plan, JobContext jobContext) {
         int mode = SessionVariable.getEagerAggregationMode();
@@ -121,12 +118,10 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
             if (groupKey instanceof SlotReference) {
                 groupKeys.add((SlotReference) groupKey);
             } else {
-                if (SessionVariable.isFeDebug()) {
-                    throw new RuntimeException("PushDownAggregation failed: agg is not normalized\n "
-                            + agg.treeString());
-                } else {
-                    return agg;
-                }
+                SessionVariable.throwRuntimeExceptionWhenFeDebug(
+                        "PushDownAggregation failed: agg is not normalized\n "
+                        + agg.treeString());
+                return agg;
             }
         }
 
@@ -157,10 +152,6 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
             return agg;
         }
 
-        if (!checkSubTreePattern(agg.child())) {
-            return agg;
-        }
-
         PushDownAggContext pushDownContext = new PushDownAggContext(new ArrayList<>(aggFunctions),
                 groupKeys, null, context.getCascadesContext(), hasSumIf);
         try {
@@ -193,7 +184,6 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
                 }
                 LogicalAggregate eagerAgg =
                         agg.withAggOutputChild(newOutputExpressions, child);
-                //checker.checkTreeAllSlotReferenceFromChildren(eagerAgg);
                 NormalizeAggregate normalizeAggregate = new NormalizeAggregate();
                 LogicalPlan normalized = normalizeAggregate.normalizeAgg(eagerAgg, Optional.empty(),
                         context.getCascadesContext());
@@ -201,17 +191,16 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
                 return adjustNullable.rewriteRoot(normalized, null);
             }
         } catch (RuntimeException e) {
-            LOG.info("PushDownAggregation failed: " + e.getMessage() + "\n" + agg.treeString());
-            if (SessionVariable.isFeDebug()) {
-                throw e;
-            }
+            String msg = "PushDownAggregation failed: " + e.getMessage() + "\n" + agg.treeString();
+            LOG.info(msg);
+            SessionVariable.throwRuntimeExceptionWhenFeDebug(msg);
         }
         return agg;
     }
 
     private boolean checkSubTreePattern(Plan root) {
         return containsPushDownJoin(root)
-                && isSPJ(root);
+                && checkPlanNodeType(root);
     }
 
     private boolean containsPushDownJoin(Plan root) {
@@ -224,14 +213,14 @@ private boolean containsPushDownJoin(Plan root) {
         return root.children().stream().anyMatch(this::containsPushDownJoin);
     }
 
-    private boolean isSPJ(Plan root) {
+    private boolean checkPlanNodeType(Plan root) {
         boolean accepted = acceptNodeType.stream()
                 .anyMatch(clazz -> clazz.isAssignableFrom(root.getClass()));
         if (!accepted) {
             return false;
         }
         for (Plan child : root.children()) {
-            if (!isSPJ(child)) {
+            if (!checkPlanNodeType(child)) {
                 return false;
             }
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java
deleted file mode 100644
index 8f2f127df80a58..00000000000000
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushdownSumIfAggregation.java
+++ /dev/null
@@ -1,156 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.nereids.rules.rewrite.eageraggregation;
-
-import org.apache.doris.nereids.jobs.JobContext;
-import org.apache.doris.nereids.rules.analysis.CheckAfterRewrite;
-import org.apache.doris.nereids.trees.expressions.Alias;
-import org.apache.doris.nereids.trees.expressions.EqualTo;
-import org.apache.doris.nereids.trees.expressions.Expression;
-import org.apache.doris.nereids.trees.expressions.NamedExpression;
-import org.apache.doris.nereids.trees.expressions.Slot;
-import org.apache.doris.nereids.trees.expressions.SlotReference;
-import org.apache.doris.nereids.trees.expressions.functions.agg.Sum;
-import org.apache.doris.nereids.trees.expressions.functions.scalar.If;
-import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
-import org.apache.doris.nereids.trees.plans.Plan;
-import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
-import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter;
-import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
-import org.apache.doris.qe.SessionVariable;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * sum(if t1.a then t2.b)
- * tpcds 2 and 59 query can be rewritten
- */
-public class PushdownSumIfAggregation extends DefaultPlanRewriter implements CustomRewriter {
-
-    @Override
-    public Plan rewriteRoot(Plan plan, JobContext jobContext) {
-        return plan;
-        //return plan.accept(this, jobContext);
-    }
-
-    @Override
-    public Plan visitLogicalAggregate(LogicalAggregate agg, JobContext context) {
-        Plan newChild = agg.child().accept(this, context);
-        if (newChild != agg.child()) {
-            // TODO : push down upper aggregations
-            return agg.withChildren(newChild);
-        }
-
-        if (agg.getSourceRepeat().isPresent()) {
-            return agg;
-        }
-
-        List aliasToBePushDown = Lists.newArrayList();
-        List ifConditions = Lists.newArrayList();
-        List ifThenSlots = Lists.newArrayList();
-        boolean patternMatch = true;
-        for (NamedExpression aggOutput : agg.getOutputExpressions()) {
-            if (aggOutput instanceof Alias) {
-                Expression body = aggOutput.child(0);
-                if (body instanceof Sum) {
-                    Expression sumBody = ((Sum) body).child();
-                    if (sumBody instanceof If) {
-                        If ifBody = (If) sumBody;
-                        if (ifBody.child(0) instanceof EqualTo
-                                && ifBody.child(1) instanceof SlotReference
-                                && ifBody.child(2) instanceof NullLiteral) {
-                            ifConditions.add((EqualTo) ifBody.child(0));
-                            ifThenSlots.add((SlotReference) ifBody.child(1));
-                            aliasToBePushDown.add(aggOutput);
-                            continue;
-                        }
-                    }
-                }
-                patternMatch = false;
-            }
-        }
-        if (!patternMatch) {
-            return agg;
-        }
-        if (ifThenSlots.isEmpty()) {
-            return agg;
-        }
-        ifThenSlots = Lists.newArrayList(Sets.newHashSet(ifThenSlots));
-
-        List groupKeys = new ArrayList<>();
-        for (Expression groupKey : agg.getGroupByExpressions()) {
-            if (groupKey instanceof SlotReference) {
-                groupKeys.add((SlotReference) groupKey);
-            } else {
-                if (SessionVariable.isFeDebug()) {
-                    throw new RuntimeException("PushDownAggregation failed: agg is not normalized\n "
-                            + agg.treeString());
-                } else {
-                    return agg;
-                }
-            }
-        }
-
-        SumAggContext sumAggContext = new SumAggContext(aliasToBePushDown, ifConditions, ifThenSlots, groupKeys);
-        SumAggWriter writer = new SumAggWriter();
-        Plan child = agg.child().accept(writer, sumAggContext);
-        CheckAfterRewrite checker = new CheckAfterRewrite();
-        checker.checkTreeAllSlotReferenceFromChildren(child);
-        if (child != agg.child()) {
-            List outputExpressions = agg.getOutputExpressions();
-            List newOutputExpressions = new ArrayList<>();
-            for (NamedExpression output : outputExpressions) {
-                if (output instanceof SlotReference) {
-                    newOutputExpressions.add(output);
-                } else if (output instanceof Alias
-                        && output.child(0) instanceof Sum
-                        && output.child(0).child(0) instanceof If
-                        && output.child(0).child(0).child(1) instanceof SlotReference) {
-                    SlotReference targetSlot = (SlotReference) output.child(0).child(0).child(1);
-                    Slot toReplace = null;
-                    for (Slot slot : child.getOutput()) {
-                        if (slot.getExprId().equals(targetSlot.getExprId())) {
-                            toReplace = slot;
-                        }
-                    }
-                    if (toReplace != null) {
-                        Alias newOutput = (Alias) ((Alias) output).withChildren(
-                                new Sum(
-                                        new If(
-                                                output.child(0).child(0).child(0),
-                                                toReplace,
-                                                new NullLiteral(toReplace.getDataType())
-                                    )
-                            )
-                        );
-                        newOutputExpressions.add(newOutput);
-                    } else {
-                        return agg;
-                    }
-
-                }
-            }
-            return agg.withAggOutputChild(newOutputExpressions, child);
-        }
-        return agg;
-    }
-}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java
deleted file mode 100644
index b868a2177e4bc7..00000000000000
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggContext.java
+++ /dev/null
@@ -1,48 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.nereids.rules.rewrite.eageraggregation;
-
-import org.apache.doris.nereids.trees.expressions.EqualTo;
-import org.apache.doris.nereids.trees.expressions.NamedExpression;
-import org.apache.doris.nereids.trees.expressions.SlotReference;
-
-import com.google.common.collect.ImmutableList;
-
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-/**
- * SumAggContext
- */
-public class SumAggContext {
-    public final List aliasToBePushDown;
-    public final List ifConditions;
-    public final List ifThenSlots;
-    public final List groupKeys;
-
-    public SumAggContext(List aliasToBePushDown,
-            List ifConditions, List ifThenSlots,
-            List groupKeys) {
-        this.aliasToBePushDown = ImmutableList.copyOf(aliasToBePushDown);
-        this.ifConditions = ImmutableList.copyOf(ifConditions);
-        Set distinct = new HashSet<>(ifThenSlots);
-        this.ifThenSlots = ImmutableList.copyOf(distinct);
-        this.groupKeys = ImmutableList.copyOf(groupKeys);
-    }
-}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java
deleted file mode 100644
index fa2c472bbc4183..00000000000000
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/SumAggWriter.java
+++ /dev/null
@@ -1,320 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.nereids.rules.rewrite.eageraggregation;
-
-import org.apache.doris.nereids.rules.rewrite.StatsDerive;
-import org.apache.doris.nereids.stats.ExpressionEstimation;
-import org.apache.doris.nereids.stats.StatsCalculator;
-import org.apache.doris.nereids.trees.expressions.Alias;
-import org.apache.doris.nereids.trees.expressions.Expression;
-import org.apache.doris.nereids.trees.expressions.NamedExpression;
-import org.apache.doris.nereids.trees.expressions.Slot;
-import org.apache.doris.nereids.trees.expressions.SlotReference;
-import org.apache.doris.nereids.trees.expressions.functions.agg.Sum;
-import org.apache.doris.nereids.trees.plans.Plan;
-import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
-import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
-import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
-import org.apache.doris.nereids.trees.plans.logical.LogicalRelation;
-import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
-import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
-import org.apache.doris.nereids.types.DataType;
-import org.apache.doris.nereids.util.ExpressionUtils;
-import org.apache.doris.qe.SessionVariable;
-import org.apache.doris.statistics.ColumnStatistic;
-import org.apache.doris.statistics.Statistics;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-/**
- * SumAggWriter
- */
-public class SumAggWriter extends DefaultPlanRewriter {
-    private static final double LOWER_AGGREGATE_EFFECT_COEFFICIENT = 10000;
-    private static final double LOW_AGGREGATE_EFFECT_COEFFICIENT = 1000;
-    private static final double MEDIUM_AGGREGATE_EFFECT_COEFFICIENT = 100;
-    private final StatsDerive derive = new StatsDerive(false);
-
-    @Override
-    public Plan visit(Plan plan, SumAggContext context) {
-        return plan;
-    }
-
-    @Override
-    public Plan visitLogicalProject(LogicalProject project, SumAggContext context) {
-        if (project.getProjects().stream().allMatch(proj -> proj instanceof SlotReference
-                || (proj instanceof Alias && proj.child(0) instanceof SlotReference))) {
-            List slotToPush = new ArrayList<>();
-            for (SlotReference slot : context.ifThenSlots) {
-                slotToPush.add((SlotReference) project.pushDownExpressionPastProject(slot));
-            }
-            List groupBySlots = new ArrayList<>();
-            for (SlotReference slot : context.groupKeys) {
-                groupBySlots.add((SlotReference) project.pushDownExpressionPastProject(slot));
-            }
-            SumAggContext contextForChild = new SumAggContext(
-                    context.aliasToBePushDown,
-                    context.ifConditions,
-                    slotToPush,
-                    groupBySlots);
-            Plan child = project.child().accept(this, contextForChild);
-            if (child != project.child()) {
-                List newProjects = Lists.newArrayList();
-                for (NamedExpression ne : project.getProjects()) {
-                    newProjects.add((NamedExpression) replaceBySlots(ne, child.getOutput()));
-                }
-                return project.withProjects(newProjects).withChildren(child);
-            }
-        }
-        return project;
-    }
-
-    private static Expression replaceBySlots(Expression expression, List slots) {
-        Map replaceMap = new HashMap<>();
-        for (Slot slot1 : expression.getInputSlots()) {
-            for (Slot slot2 : slots) {
-                if (slot1.getExprId().asInt() == slot2.getExprId().asInt()) {
-                    replaceMap.put(slot1, slot2);
-                }
-            }
-        }
-        Expression result = ExpressionUtils.replace(expression, replaceMap);
-        return result;
-    }
-
-    @Override
-    public Plan visitLogicalJoin(LogicalJoin join, SumAggContext context) {
-        Set leftOutput = join.left().getOutputSet();
-        Set conditionSlots = join.getConditionSlot().stream()
-                .map(slot -> (SlotReference) slot).collect(Collectors.toSet());
-        for (Slot slot : context.ifThenSlots) {
-            if (conditionSlots.contains(slot)) {
-                return join;
-            }
-        }
-        Set conditionSlotsFromLeft = Sets.newHashSet(conditionSlots);
-        conditionSlotsFromLeft.retainAll(leftOutput);
-        for (SlotReference slot : context.groupKeys) {
-            if (leftOutput.contains(slot)) {
-                conditionSlotsFromLeft.add(slot);
-            }
-        }
-        if (leftOutput.containsAll(context.ifThenSlots)) {
-            SumAggContext contextForChild = new SumAggContext(
-                    context.aliasToBePushDown,
-                    context.ifConditions,
-                    context.ifThenSlots,
-                    Lists.newArrayList(conditionSlotsFromLeft)
-            );
-            Plan left = join.left().accept(this, contextForChild);
-            if (join.left() != left) {
-                return join.withChildren(left, join.right());
-            }
-        }
-        return join;
-    }
-
-    @Override
-    public Plan visitLogicalUnion(LogicalUnion union, SumAggContext context) {
-        if (!union.getOutputSet().containsAll(context.ifThenSlots)) {
-            return union;
-        }
-        if (!union.getConstantExprsList().isEmpty()) {
-            return union;
-        }
-
-        if (!union.getOutputs().stream().allMatch(e -> e instanceof SlotReference)) {
-            return union;
-        }
-        List newChildren = Lists.newArrayList();
-
-        boolean changed = false;
-        for (int i = 0; i < union.children().size(); i++) {
-            Plan child = union.children().get(i);
-            List ifThenSlotsForChild = new ArrayList<>();
-            // List groupByForChild = new ArrayList<>();
-            for (SlotReference slot : context.ifThenSlots) {
-                Expression pushed = union.pushDownExpressionPastSetOperator(slot, i);
-                if (pushed instanceof SlotReference) {
-                    ifThenSlotsForChild.add((SlotReference) pushed);
-                } else {
-                    return union;
-                }
-            }
-            int childIdx = i;
-            SumAggContext contextForChild = new SumAggContext(
-                    context.aliasToBePushDown,
-                    context.ifConditions,
-                    ifThenSlotsForChild,
-                    context.groupKeys.stream().map(slot
-                            -> (SlotReference) union.pushDownExpressionPastSetOperator(slot, childIdx))
-                            .collect(Collectors.toList())
-                    );
-            Plan newChild = child.accept(this, contextForChild);
-            if (newChild != child) {
-                changed = true;
-            }
-            newChildren.add(newChild);
-        }
-        if (changed) {
-            List> newRegularChildrenOutputs = Lists.newArrayList();
-            for (int i = 0; i < newChildren.size(); i++) {
-                List childOutput = new ArrayList<>();
-                for (SlotReference slot : union.getRegularChildOutput(i)) {
-                    for (Slot c : newChildren.get(i).getOutput()) {
-                        if (slot.equals(c)) {
-                            childOutput.add((SlotReference) c);
-                            break;
-                        }
-                    }
-                }
-                newRegularChildrenOutputs.add(childOutput);
-            }
-            List newOutputs = new ArrayList<>();
-            for (int i = 0; i < union.getOutput().size(); i++) {
-                SlotReference originSlot = (SlotReference) union.getOutput().get(i);
-                DataType dataType = newRegularChildrenOutputs.get(0).get(i).getDataType();
-                newOutputs.add(originSlot.withNullableAndDataType(originSlot.nullable(), dataType));
-            }
-            return union.withChildrenAndOutputs(newChildren, newOutputs, newRegularChildrenOutputs);
-        } else {
-            return union;
-        }
-    }
-
-    @Override
-    public Plan visitLogicalRelation(LogicalRelation relation, SumAggContext context) {
-        return genAggregate(relation, context);
-    }
-
-    private Plan genAggregate(Plan child, SumAggContext context) {
-        if (checkStats(child, context)) {
-            List aggOutputExpressions = new ArrayList<>();
-            for (SlotReference slot : context.ifThenSlots) {
-                Alias alias = new Alias(slot.getExprId(), new Sum(slot));
-                aggOutputExpressions.add(alias);
-            }
-            aggOutputExpressions.addAll(context.groupKeys);
-
-            LogicalAggregate genAgg = new LogicalAggregate(context.groupKeys, aggOutputExpressions, child);
-            return genAgg;
-        } else {
-            return child;
-        }
-
-    }
-
-    private boolean checkStats(Plan plan, SumAggContext context) {
-        int mode = SessionVariable.getEagerAggregationMode();
-        if (mode < 0) {
-            return false;
-        }
-        if (mode > 0) {
-            return true;
-        }
-        Statistics stats = plan.getStats();
-        if (stats == null) {
-            stats = plan.accept(derive, new StatsDerive.DeriveContext());
-        }
-        if (stats.getRowCount() == 0) {
-            return false;
-        }
-
-        List groupKeysStats = new ArrayList<>();
-
-        List lower = Lists.newArrayList();
-        List medium = Lists.newArrayList();
-        List high = Lists.newArrayList();
-
-        List[] cards = new List[] {lower, medium, high};
-
-        for (NamedExpression key : context.groupKeys) {
-            ColumnStatistic colStats = ExpressionEstimation.INSTANCE.estimate(key, stats);
-            if (colStats.isUnKnown) {
-                return false;
-            }
-            groupKeysStats.add(colStats);
-            cards[groupByCardinality(colStats, stats.getRowCount())].add(colStats);
-        }
-
-        double lowerCartesian = 1.0;
-        for (ColumnStatistic colStats : lower) {
-            lowerCartesian = lowerCartesian * colStats.ndv;
-        }
-
-        // pow(row_count/20, a half of lower column size)
-        double lowerUpper = Math.max(stats.getRowCount() / 20, 1);
-        lowerUpper = Math.pow(lowerUpper, Math.max(lower.size() / 2, 1));
-
-        if (high.isEmpty() && (lower.size() + medium.size()) == 1) {
-            return true;
-        }
-
-        if (high.isEmpty() && medium.isEmpty()) {
-            if (lower.size() == 1 && lowerCartesian * 20 <= stats.getRowCount()) {
-                return true;
-            } else if (lower.size() == 2 && lowerCartesian * 7 <= stats.getRowCount()) {
-                return true;
-            } else if (lower.size() <= 3 && lowerCartesian * 20 <= stats.getRowCount() && lowerCartesian < lowerUpper) {
-                return true;
-            } else {
-                return false;
-            }
-        }
-
-        if (high.size() >= 2 || medium.size() > 2 || (high.size() == 1 && !medium.isEmpty())) {
-            return false;
-        }
-
-        // 3. Extremely low cardinality for lower with at most one medium or high.
-        double lowerCartesianLowerBound =
-                stats.getRowCount() / LOWER_AGGREGATE_EFFECT_COEFFICIENT;
-        if (high.size() + medium.size() == 1 && lower.size() <= 2 && lowerCartesian <= lowerCartesianLowerBound) {
-            StatsCalculator statsCalculator = new StatsCalculator(null);
-            double estAggRowCount = statsCalculator.estimateGroupByRowCount(
-                    context.groupKeys.stream().map(s -> (Expression) s).collect(Collectors.toList()),
-                    stats);
-            return estAggRowCount < lowerCartesianLowerBound;
-        }
-
-        return false;
-    }
-
-    // high(2): row_count / cardinality < MEDIUM_AGGREGATE_EFFECT_COEFFICIENT
-    // medium(1): row_count / cardinality >= MEDIUM_AGGREGATE_EFFECT_COEFFICIENT and < LOW_AGGREGATE_EFFECT_COEFFICIENT
-    // lower(0): row_count / cardinality >= LOW_AGGREGATE_EFFECT_COEFFICIENT
-    private int groupByCardinality(ColumnStatistic colStats, double rowCount) {
-        if (rowCount == 0 || colStats.ndv * MEDIUM_AGGREGATE_EFFECT_COEFFICIENT > rowCount) {
-            return 2;
-        } else if (colStats.ndv * MEDIUM_AGGREGATE_EFFECT_COEFFICIENT <= rowCount
-                && colStats.ndv * LOW_AGGREGATE_EFFECT_COEFFICIENT > rowCount) {
-            return 1;
-        } else if (colStats.ndv * LOW_AGGREGATE_EFFECT_COEFFICIENT <= rowCount) {
-            return 0;
-        }
-        return 2;
-    }
-}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java
index 161490e72a593c..edf232d07ece28 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java
@@ -86,26 +86,6 @@ public LogicalSetOperation(PlanType planType, Qualifier qualifier,
         this.qualifier = qualifier;
         this.outputs = ImmutableList.copyOf(outputs);
         this.regularChildrenOutputs = ImmutableList.copyOf(regularChildrenOutputs);
-        // for (List regular : regularChildrenOutputs) {
-        //     if (outputs.size() != regular.size()) {
-        //         System.out.println("eeeee");
-        //     }
-        // }
-        // for (int childIdx = 0; childIdx < children().size(); childIdx++) {
-        //     if (regularChildrenOutputs.isEmpty()) {
-        //         continue;
-        //     }
-        //     List regularList = regularChildrenOutputs.get(childIdx);
-        //     for (int colIdx = 0; colIdx < outputs.size(); colIdx++) {
-        //         SlotReference regularSlot = regularList.get(colIdx);
-        //         Plan currChild = children.get(childIdx);
-        //         boolean contains = currChild.getOutputExprIds().contains(regularSlot.getExprId());
-        //         if (!contains) {
-        //             System.out.println("errrr");
-        //         }
-        //     }
-        // }
-
     }
 
     public LogicalSetOperation(PlanType planType, Qualifier qualifier, List outputs,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java
index 8afd56b70ad0df..ab932315348941 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java
@@ -363,8 +363,7 @@ public static List shuttleExpressionWithLineage(List S selectMinimumColumn(Collection sl
     }
 
     /**
-     * Check whether the input expression is a
-     * {@link org.apache.doris.nereids.trees.expressions.Slot}
-     * or at least one {@link Cast} on a
-     * {@link org.apache.doris.nereids.trees.expressions.Slot}
+     * Check whether the input expression is a {@link org.apache.doris.nereids.trees.expressions.Slot}
+     * or at least one {@link Cast} on a {@link org.apache.doris.nereids.trees.expressions.Slot}
      * 

* for example: * - SlotReference to a column: @@ -425,8 +422,7 @@ public static S selectMinimumColumn(Collection sl * cast(cast(int_col as long) as string) * * @param expr input expression - * @return Return Optional[ExprId] of underlying slot reference if input - * expression is a slot or cast on slot. + * @return Return Optional[ExprId] of underlying slot reference if input expression is a slot or cast on slot. * Otherwise, return empty optional result. */ public static Optional isSlotOrCastOnSlot(Expression expr) { @@ -434,10 +430,8 @@ public static Optional isSlotOrCastOnSlot(Expression expr) { } /** - * Check whether the input expression is a - * {@link org.apache.doris.nereids.trees.expressions.Slot} - * or at least one {@link Cast} on a - * {@link org.apache.doris.nereids.trees.expressions.Slot} + * Check whether the input expression is a {@link org.apache.doris.nereids.trees.expressions.Slot} + * or at least one {@link Cast} on a {@link org.apache.doris.nereids.trees.expressions.Slot} */ public static Optional extractSlotOrCastOnSlot(Expression expr) { while (expr instanceof Cast) { @@ -452,8 +446,7 @@ public static Optional extractSlotOrCastOnSlot(Expression expr) { } /** - * Generate replaceMap Slot -> Expression from NamedExpression[Expression as - * name] + * Generate replaceMap Slot -> Expression from NamedExpression[Expression as name] */ public static Map generateReplaceMap(List namedExpressions) { Map replaceMap = Maps.newLinkedHashMapWithExpectedSize(namedExpressions.size()); @@ -481,8 +474,7 @@ public static NamedExpression replaceNameExpression(NamedExpression expr, } /** - * Replace expression node with predicate in the expression tree by `replaceMap` - * in top-down manner. + * Replace expression node with predicate in the expression tree by `replaceMap` in top-down manner. */ public static Expression replaceIf(Expression expr, Map replaceMap, Predicate predicate, boolean stopWhenNotMatched) { @@ -512,11 +504,9 @@ public static List replaceWithCounter(List exprs, } /** - * Replace expression node in the expression tree by `replaceMap` in top-down - * manner. + * Replace expression node in the expression tree by `replaceMap` in top-down manner. * This function gives counter map to record replace count. * For example. - * *

      * input expression: a > 1
      * replaceMap: a -> b + c
@@ -543,10 +533,8 @@ public static Expression replaceWithCounter(Expression expr,
     }
 
     /**
-     * Replace expression node in the expression tree by `replaceMap` in top-down
-     * manner.
+     * Replace expression node in the expression tree by `replaceMap` in top-down manner.
      * For example.
-     *
      * 
      * input expression: a > 1
      * replaceMap: a -> b + c
@@ -563,10 +551,8 @@ public static Expression replace(Expression expr, Map
      * input expression: a > 1
      * replaceMap: d -> b + c, transferMap: a -> d
@@ -620,8 +606,7 @@ public static Expression replaceNullAware(Expression expr,
             Expression replacedExpr = replaceMap.get(e);
             if (replacedExpr == null && e instanceof SlotReference
                     && e.getDataType() instanceof VariantType) {
-                // this is valid, because the variant expression would be extended in expression
-                // rewrite
+                // this is valid, because the variant expression would be extended in expression rewrite
                 return e;
             }
             if (replacedExpr == null && e instanceof NamedExpression) {
@@ -635,8 +620,7 @@ public static Expression replaceNullAware(Expression expr,
     }
 
     /**
-     * Replace expression node in the expression tree by `replaceMap` in top-down
-     * manner.
+     * Replace expression node in the expression tree by `replaceMap` in top-down manner.
      */
     public static List replaceNamedExpressions(List namedExpressions,
             Map replaceMap) {
@@ -656,8 +640,8 @@ public static List replaceNamedExpressions(List e instanceof UniqueFunction ? ((UniqueFunction) e).withIgnoreUniqueId(ignoreUniqueId) : e);
+        return expression.rewriteDownShortCircuit(e ->
+                e instanceof UniqueFunction ? ((UniqueFunction) e).withIgnoreUniqueId(ignoreUniqueId) : e);
     }
 
     public static  List rewriteDownShortCircuit(
@@ -725,12 +709,10 @@ public static boolean hasNullLiteral(List children) {
     public static boolean canInferNotNullForMarkSlot(Expression predicate, ExpressionRewriteContext ctx) {
         /*
          * assume predicate is from LogicalFilter
-         * the idea is replacing each mark join slot with null and false literal then
-         * run FoldConstant rule
+         * the idea is replacing each mark join slot with null and false literal then run FoldConstant rule
          * if the evaluate result are:
          * 1. all true
-         * 2. all null and false (in logicalFilter, we discard both null and false
-         * values)
+         * 2. all null and false (in logicalFilter, we discard both null and false values)
          * the mark slot can be non-nullable boolean
          * and in semi join, we can safely change the mark conjunct to hash conjunct
          */
@@ -823,8 +805,7 @@ public static Set inferNotNull(Set predicates, CascadesC
     }
 
     /**
-     * infer notNulls slot from predicate but these slots must be in the given
-     * slots.
+     * infer notNulls slot from predicate but these slots must be in the given slots.
      */
     public static Set inferNotNull(Set predicates, Set slots,
             CascadesContext cascadesContext) {
@@ -1095,8 +1076,7 @@ public static Expression getSingleNumericSlotOrExpressionCoveredByCast(Expressio
         }
         // for other datatype, only support cast.
         // example: T1 join T2 on subStr(T1.a, 1,4) = subStr(T2.a, 1,4)
-        // the cost of subStr is too high, and hence we do not generate RF subStr(T2.a,
-        // 1,4)->subStr(T1.a, 1,4)
+        // the cost of subStr is too high, and hence we do not generate RF subStr(T2.a, 1,4)->subStr(T1.a, 1,4)
         while (expression instanceof Cast) {
             expression = ((Cast) expression).child();
         }
@@ -1118,8 +1098,7 @@ public static boolean checkSlotConstant(Slot slot, Set predicates) {
     }
 
     /**
-     * Check the expression is inferred or not, if inferred return true, nor return
-     * false
+     * Check the expression is inferred or not, if inferred return true, nor return false
      */
     public static boolean isInferred(Expression expression) {
         return expression.accept(new DefaultExpressionVisitor() {
@@ -1349,8 +1328,7 @@ public static boolean hasNonWindowAggregateFunction(Expression expression) {
     }
 
     /**
-     * check if the expressions contain a unique function which exists multiple
-     * times
+     * check if the expressions contain a unique function which exists multiple times
      */
     public static boolean containUniqueFunctionExistMultiple(Collection expressions) {
         Set counterSet = Sets.newHashSet();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
index cdceb10f87bff4..fc409fefebf40d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@@ -775,7 +775,6 @@ private void executeByNereids(TUniqueId queryId) throws Exception {
                     LOG.debug("Command({}) process failed.", originStmt.originStmt, e);
                 }
                 context.getState().setError(ErrorCode.ERR_UNKNOWN_ERROR, e.getMessage());
-                e.printStackTrace();
                 throw new NereidsException("Command (" + originStmt.originStmt + ") process failed.",
                         new AnalysisException(e.getMessage() == null ? e.toString() : e.getMessage(), e));
             }

From e53f8472eda4c27d0088460f8b6c282ff8bab91d Mon Sep 17 00:00:00 2001
From: englefly 
Date: Tue, 13 Jan 2026 16:20:32 +0800
Subject: [PATCH 16/21] update shape

---
 .../tpcds_sf100/rf_prune/query2.out           |  22 +--
 .../tpcds_sf100/rf_prune/query3.out           |  19 +--
 .../tpcds_sf100/rf_prune/query31.out          |  18 ++-
 .../tpcds_sf100/rf_prune/query37.out          |  25 ++--
 .../tpcds_sf100/rf_prune/query38.out          |  27 ++--
 .../tpcds_sf100/rf_prune/query42.out          |   9 +-
 .../tpcds_sf100/rf_prune/query43.out          |   9 +-
 .../tpcds_sf100/rf_prune/query52.out          |   9 +-
 .../tpcds_sf100/rf_prune/query55.out          |   9 +-
 .../tpcds_sf100/rf_prune/query59.out          |   9 +-
 .../tpcds_sf100/rf_prune/query82.out          |  25 ++--
 .../tpcds_sf100/rf_prune/query87.out          |  27 ++--
 .../shape_check/tpcds_sf100/shape/query2.out  |  22 +--
 .../shape_check/tpcds_sf100/shape/query3.out  |  19 +--
 .../shape_check/tpcds_sf100/shape/query31.out |  18 ++-
 .../shape_check/tpcds_sf100/shape/query37.out |  25 ++--
 .../shape_check/tpcds_sf100/shape/query38.out |  27 ++--
 .../shape_check/tpcds_sf100/shape/query42.out |   9 +-
 .../shape_check/tpcds_sf100/shape/query43.out |   9 +-
 .../shape_check/tpcds_sf100/shape/query52.out |   9 +-
 .../shape_check/tpcds_sf100/shape/query55.out |   9 +-
 .../shape_check/tpcds_sf100/shape/query59.out |   9 +-
 .../shape_check/tpcds_sf100/shape/query64.out | 129 +++++++++---------
 .../shape_check/tpcds_sf100/shape/query82.out |  25 ++--
 .../shape_check/tpcds_sf100/shape/query87.out |  27 ++--
 .../shape_check/tpcds_sf1000/hint/query2.out  |  16 ++-
 .../shape_check/tpcds_sf1000/hint/query31.out |  18 ++-
 .../shape_check/tpcds_sf1000/hint/query38.out |  27 ++--
 .../shape_check/tpcds_sf1000/hint/query42.out |   9 +-
 .../shape_check/tpcds_sf1000/hint/query43.out |   9 +-
 .../shape_check/tpcds_sf1000/hint/query52.out |   9 +-
 .../shape_check/tpcds_sf1000/hint/query59.out |   9 +-
 .../shape_check/tpcds_sf1000/hint/query87.out |  27 ++--
 .../tpcds_sf1000/shape/query37.out            |  25 ++--
 .../tpcds_sf1000/shape/query38.out            |  27 ++--
 .../tpcds_sf1000/shape/query82.out            |  25 ++--
 .../tpcds_sf1000/shape/query87.out            |  27 ++--
 .../tpcds_sf1000_nopkfk/shape/query21.out     |   2 +-
 .../tpcds_sf1000_nopkfk/shape/query22.out     |   2 +-
 .../tpcds_sf1000_nopkfk/shape/query39.out     |   2 +-
 .../tpcds_sf1000_nopkfk/shape/query5.out      |  13 +-
 .../tpcds_sf1000_nopkfk/shape/query78.out     |  10 +-
 .../data/shape_check/tpch_sf1000/hint/q10.out |   7 +-
 .../data/shape_check/tpch_sf1000/hint/q12.out |   5 +-
 .../data/shape_check/tpch_sf1000/hint/q19.out |   9 +-
 .../data/shape_check/tpch_sf1000/hint/q3.out  |   7 +-
 .../shape_check/tpch_sf1000/rf_prune/q10.out  |  29 ++--
 .../shape_check/tpch_sf1000/rf_prune/q11.out  |  19 +--
 .../shape_check/tpch_sf1000/rf_prune/q12.out  |   5 +-
 .../shape_check/tpch_sf1000/rf_prune/q14.out  |   8 +-
 .../shape_check/tpch_sf1000/rf_prune/q19.out  |  11 +-
 .../shape_check/tpch_sf1000/rf_prune/q3.out   |   7 +-
 .../shape_check/tpch_sf1000/shape/q10.out     |  29 ++--
 .../shape_check/tpch_sf1000/shape/q11.out     |  19 +--
 .../shape_check/tpch_sf1000/shape/q12.out     |   5 +-
 .../shape_check/tpch_sf1000/shape/q14.out     |   9 +-
 .../shape_check/tpch_sf1000/shape/q19.out     |  11 +-
 .../data/shape_check/tpch_sf1000/shape/q3.out |   7 +-
 58 files changed, 597 insertions(+), 392 deletions(-)

diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query2.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query2.out
index 12167efa7486a4..41ad24ad066406 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query2.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query2.out
@@ -6,16 +6,20 @@ PhysicalCteAnchor ( cteId=CTEId#1 )
 ------PhysicalDistribute[DistributionSpecHash]
 --------hashAgg[LOCAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = wscs.sold_date_sk)) otherCondition=()
---------------PhysicalUnion
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalProject
---------------------PhysicalOlapScan[web_sales]
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_sales]
+------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = wscs.sold_date_sk)) otherCondition=() build RFs:RF0 sold_date_sk->[d_date_sk]
 --------------PhysicalProject
-----------------PhysicalOlapScan[date_dim]
+----------------PhysicalOlapScan[date_dim] apply RFs: RF0
+--------------PhysicalUnion
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_sales]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[catalog_sales]
 --PhysicalResultSink
 ----PhysicalQuickSort[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query3.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query3.out
index e9c6ec79c33e7b..fae84ff1a42849 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query3.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query3.out
@@ -9,15 +9,18 @@ PhysicalResultSink
 ------------PhysicalDistribute[DistributionSpecHash]
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ss_item_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+------------------------hashAgg[GLOBAL]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashAgg[LOCAL]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
-------------------------PhysicalProject
---------------------------filter((item.i_manufact_id = 816))
-----------------------------PhysicalOlapScan[item]
+--------------------------filter((dt.d_moy = 11))
+----------------------------PhysicalOlapScan[date_dim(dt)]
 --------------------PhysicalProject
-----------------------filter((dt.d_moy = 11))
-------------------------PhysicalOlapScan[date_dim(dt)]
+----------------------filter((item.i_manufact_id = 816))
+------------------------PhysicalOlapScan[item]
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query31.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query31.out
index af4392c42a70b7..89da394c70488e 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query31.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query31.out
@@ -9,9 +9,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------PhysicalProject
 --------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=()
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[store_sales] apply RFs: RF0
+------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------hashAgg[GLOBAL]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------hashAgg[LOCAL]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
 --------------------PhysicalProject
 ----------------------filter((ss.d_year = 2000) and d_qoy IN (1, 2, 3))
 ------------------------PhysicalOlapScan[date_dim]
@@ -26,9 +29,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=()
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[web_sales] apply RFs: RF2
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2
 ----------------------PhysicalProject
 ------------------------filter((ws.d_year = 2000) and d_qoy IN (1, 2, 3))
 --------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query37.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query37.out
index 222831f5333ef6..df27c72da5084f 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query37.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query37.out
@@ -8,20 +8,23 @@ PhysicalResultSink
 ----------PhysicalDistribute[DistributionSpecHash]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk]
+----------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[inv_date_sk]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk]
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk]
---------------------------PhysicalProject
-----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
-------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1
+------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
+--------------------------PhysicalOlapScan[inventory] apply RFs: RF1 RF2
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
 --------------------------PhysicalProject
 ----------------------------filter((item.i_current_price <= 75.00) and (item.i_current_price >= 45.00) and i_manufact_id IN (1000, 707, 747, 856))
 ------------------------------PhysicalOlapScan[item]
-----------------------PhysicalProject
-------------------------filter((date_dim.d_date <= '1999-04-22') and (date_dim.d_date >= '1999-02-21'))
---------------------------PhysicalOlapScan[date_dim]
+------------------PhysicalProject
+--------------------filter((date_dim.d_date <= '1999-04-22') and (date_dim.d_date >= '1999-02-21'))
+----------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out
index 7534ddcc8c2579..7a734f7ef0ce6b 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query38.out
@@ -14,9 +14,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
 ----------------------------------PhysicalOlapScan[date_dim]
@@ -28,9 +31,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
 ----------------------------------PhysicalOlapScan[date_dim]
@@ -42,9 +48,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=()
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
 ----------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query42.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query42.out
index b2a262da46536b..0bb9347e187642 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query42.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query42.out
@@ -11,9 +11,12 @@ PhysicalResultSink
 ----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+------------------------hashAgg[GLOBAL]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashAgg[LOCAL]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------------PhysicalProject
 --------------------------filter((item.i_manager_id = 1))
 ----------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query43.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query43.out
index 37ab89010ef0a9..38ee41c557e4dd 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query43.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query43.out
@@ -10,9 +10,12 @@ PhysicalResultSink
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ----------------------PhysicalProject
 ------------------------filter((date_dim.d_year = 2000))
 --------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query52.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query52.out
index ddcc09c49f974a..0d3bcc7688306c 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query52.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query52.out
@@ -11,9 +11,12 @@ PhysicalResultSink
 ----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+------------------------hashAgg[GLOBAL]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashAgg[LOCAL]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------------PhysicalProject
 --------------------------filter((item.i_manager_id = 1))
 ----------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query55.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query55.out
index e24470e9606c8b..94b0bfd66b3823 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query55.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query55.out
@@ -11,9 +11,12 @@ PhysicalResultSink
 ----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+------------------------hashAgg[GLOBAL]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashAgg[LOCAL]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------------PhysicalProject
 --------------------------filter((item.i_manager_id = 100))
 ----------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query59.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query59.out
index 215b62a9180db7..12ffd327e847a8 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query59.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query59.out
@@ -6,9 +6,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------PhysicalDistribute[DistributionSpecHash]
 --------hashAgg[LOCAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=()
---------------PhysicalProject
-----------------PhysicalOlapScan[store_sales]
+------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=()
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[store_sales]
 --------------PhysicalProject
 ----------------PhysicalOlapScan[date_dim]
 --PhysicalResultSink
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query82.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query82.out
index aa01a8a275e52a..8072e7626228cb 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query82.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query82.out
@@ -8,20 +8,23 @@ PhysicalResultSink
 ----------PhysicalDistribute[DistributionSpecHash]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
+----------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[inv_date_sk]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[store_sales] apply RFs: RF2
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk]
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk]
---------------------------PhysicalProject
-----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
-------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1
+------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
+--------------------------PhysicalOlapScan[inventory] apply RFs: RF1 RF2
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
 --------------------------PhysicalProject
 ----------------------------filter((item.i_current_price <= 47.00) and (item.i_current_price >= 17.00) and i_manufact_id IN (138, 169, 339, 639))
 ------------------------------PhysicalOlapScan[item]
-----------------------PhysicalProject
-------------------------filter((date_dim.d_date <= '1999-09-07') and (date_dim.d_date >= '1999-07-09'))
---------------------------PhysicalOlapScan[date_dim]
+------------------PhysicalProject
+--------------------filter((date_dim.d_date <= '1999-09-07') and (date_dim.d_date >= '1999-07-09'))
+----------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out
index 32c3855f30b3fa..e0dd3c9c5e9567 100644
--- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out
+++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query87.out
@@ -12,9 +12,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=()
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
 ------------------------------PhysicalOlapScan[date_dim]
@@ -26,9 +29,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
 ------------------------------PhysicalOlapScan[date_dim]
@@ -40,9 +46,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
 ------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query2.out b/regression-test/data/shape_check/tpcds_sf100/shape/query2.out
index b9857d349977f8..41ad24ad066406 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query2.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query2.out
@@ -6,16 +6,20 @@ PhysicalCteAnchor ( cteId=CTEId#1 )
 ------PhysicalDistribute[DistributionSpecHash]
 --------hashAgg[LOCAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = wscs.sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk,ws_sold_date_sk]
---------------PhysicalUnion
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalProject
---------------------PhysicalOlapScan[web_sales] apply RFs: RF0
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
+------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = wscs.sold_date_sk)) otherCondition=() build RFs:RF0 sold_date_sk->[d_date_sk]
 --------------PhysicalProject
-----------------PhysicalOlapScan[date_dim]
+----------------PhysicalOlapScan[date_dim] apply RFs: RF0
+--------------PhysicalUnion
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_sales]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[catalog_sales]
 --PhysicalResultSink
 ----PhysicalQuickSort[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query3.out b/regression-test/data/shape_check/tpcds_sf100/shape/query3.out
index e9c6ec79c33e7b..fae84ff1a42849 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query3.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query3.out
@@ -9,15 +9,18 @@ PhysicalResultSink
 ------------PhysicalDistribute[DistributionSpecHash]
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ss_item_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+------------------------hashAgg[GLOBAL]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashAgg[LOCAL]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
-------------------------PhysicalProject
---------------------------filter((item.i_manufact_id = 816))
-----------------------------PhysicalOlapScan[item]
+--------------------------filter((dt.d_moy = 11))
+----------------------------PhysicalOlapScan[date_dim(dt)]
 --------------------PhysicalProject
-----------------------filter((dt.d_moy = 11))
-------------------------PhysicalOlapScan[date_dim(dt)]
+----------------------filter((item.i_manufact_id = 816))
+------------------------PhysicalOlapScan[item]
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query31.out b/regression-test/data/shape_check/tpcds_sf100/shape/query31.out
index d45dad8dca8825..ed4c26b9893795 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query31.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query31.out
@@ -9,9 +9,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------PhysicalProject
 --------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk]
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------hashAgg[GLOBAL]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------hashAgg[LOCAL]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 --------------------PhysicalProject
 ----------------------filter((ss.d_year = 2000) and d_qoy IN (1, 2, 3))
 ------------------------PhysicalOlapScan[date_dim]
@@ -26,9 +29,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ws_bill_addr_sk]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
 ----------------------PhysicalProject
 ------------------------filter((ws.d_year = 2000) and d_qoy IN (1, 2, 3))
 --------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query37.out b/regression-test/data/shape_check/tpcds_sf100/shape/query37.out
index 222831f5333ef6..df27c72da5084f 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query37.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query37.out
@@ -8,20 +8,23 @@ PhysicalResultSink
 ----------PhysicalDistribute[DistributionSpecHash]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk]
+----------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[inv_date_sk]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk]
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk]
---------------------------PhysicalProject
-----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
-------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1
+------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
+--------------------------PhysicalOlapScan[inventory] apply RFs: RF1 RF2
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
 --------------------------PhysicalProject
 ----------------------------filter((item.i_current_price <= 75.00) and (item.i_current_price >= 45.00) and i_manufact_id IN (1000, 707, 747, 856))
 ------------------------------PhysicalOlapScan[item]
-----------------------PhysicalProject
-------------------------filter((date_dim.d_date <= '1999-04-22') and (date_dim.d_date >= '1999-02-21'))
---------------------------PhysicalOlapScan[date_dim]
+------------------PhysicalProject
+--------------------filter((date_dim.d_date <= '1999-04-22') and (date_dim.d_date >= '1999-02-21'))
+----------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query38.out b/regression-test/data/shape_check/tpcds_sf100/shape/query38.out
index e55825e7e76457..43ed9bc850024d 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query38.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query38.out
@@ -14,9 +14,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
 ----------------------------------PhysicalOlapScan[date_dim]
@@ -28,9 +31,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
 ----------------------------------PhysicalOlapScan[date_dim]
@@ -42,9 +48,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
 ----------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query42.out b/regression-test/data/shape_check/tpcds_sf100/shape/query42.out
index b2a262da46536b..0bb9347e187642 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query42.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query42.out
@@ -11,9 +11,12 @@ PhysicalResultSink
 ----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+------------------------hashAgg[GLOBAL]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashAgg[LOCAL]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------------PhysicalProject
 --------------------------filter((item.i_manager_id = 1))
 ----------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query43.out b/regression-test/data/shape_check/tpcds_sf100/shape/query43.out
index 37ab89010ef0a9..38ee41c557e4dd 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query43.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query43.out
@@ -10,9 +10,12 @@ PhysicalResultSink
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ----------------------PhysicalProject
 ------------------------filter((date_dim.d_year = 2000))
 --------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query52.out b/regression-test/data/shape_check/tpcds_sf100/shape/query52.out
index ddcc09c49f974a..0d3bcc7688306c 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query52.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query52.out
@@ -11,9 +11,12 @@ PhysicalResultSink
 ----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+------------------------hashAgg[GLOBAL]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashAgg[LOCAL]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------------PhysicalProject
 --------------------------filter((item.i_manager_id = 1))
 ----------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query55.out b/regression-test/data/shape_check/tpcds_sf100/shape/query55.out
index e24470e9606c8b..94b0bfd66b3823 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query55.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query55.out
@@ -11,9 +11,12 @@ PhysicalResultSink
 ----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+------------------------hashAgg[GLOBAL]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashAgg[LOCAL]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------------PhysicalProject
 --------------------------filter((item.i_manager_id = 100))
 ----------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query59.out b/regression-test/data/shape_check/tpcds_sf100/shape/query59.out
index f28f49bf99a493..f5fc81e7ddb98c 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query59.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query59.out
@@ -6,9 +6,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------PhysicalDistribute[DistributionSpecHash]
 --------hashAgg[LOCAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------PhysicalProject
-----------------PhysicalOlapScan[store_sales] apply RFs: RF0
+------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[store_sales] apply RFs: RF0
 --------------PhysicalProject
 ----------------PhysicalOlapScan[date_dim]
 --PhysicalResultSink
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query64.out b/regression-test/data/shape_check/tpcds_sf100/shape/query64.out
index 9b91b5a6891b8f..79c6672dc2e533 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query64.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query64.out
@@ -7,85 +7,86 @@ PhysicalCteAnchor ( cteId=CTEId#1 )
 --------PhysicalDistribute[DistributionSpecHash]
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
---------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF19 d_date_sk->[c_first_shipto_date_sk]
+--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF19 i_item_sk->[cr_item_sk,cs_item_sk,sr_item_sk,ss_item_sk]
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF18 d_date_sk->[c_first_sales_date_sk]
+------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF18 ib_income_band_sk->[hd_income_band_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF17 ss_customer_sk->[c_customer_sk]
+----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF17 ib_income_band_sk->[hd_income_band_sk]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF16 ca_address_sk->[c_current_addr_sk]
+--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF16 ca_address_sk->[c_current_addr_sk]
 ----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF15 cd_demo_sk->[c_current_cdemo_sk]
+------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF15 ca_address_sk->[ss_addr_sk]
 --------------------------------PhysicalProject
 ----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF14 hd_demo_sk->[c_current_hdemo_sk]
 ------------------------------------PhysicalProject
---------------------------------------PhysicalOlapScan[customer] apply RFs: RF14 RF15 RF16 RF17 RF18 RF19
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF13 ib_income_band_sk->[hd_income_band_sk]
+--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF13 hd_demo_sk->[ss_hdemo_sk]
 ----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[household_demographics(hd2)] apply RFs: RF13
+------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF12 p_promo_sk->[ss_promo_sk]
+--------------------------------------------PhysicalProject
+----------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF11 cd_demo_sk->[c_current_cdemo_sk]
+------------------------------------------------PhysicalProject
+--------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF10 cd_demo_sk->[ss_cdemo_sk]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_shipto_date_sk = d3.d_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[c_first_shipto_date_sk]
+--------------------------------------------------------PhysicalProject
+----------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF8 d_date_sk->[c_first_sales_date_sk]
+------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF7 c_customer_sk->[ss_customer_sk]
+----------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF6 s_store_sk->[ss_store_sk]
+--------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF4 sr_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk];RF5 sr_ticket_number->[ss_ticket_number]
+------------------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF3 cs_item_sk->[ss_item_sk]
+----------------------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
+--------------------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF4 RF5 RF6 RF7 RF10 RF12 RF13 RF15 RF19
+--------------------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------------------filter(d_year IN (2001, 2002))
+------------------------------------------------------------------------------------PhysicalOlapScan[date_dim(d1)]
+----------------------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------------------filter((sale > (2 * refund)))
+--------------------------------------------------------------------------------hashAgg[GLOBAL]
+----------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------------------------------------------------hashAgg[LOCAL]
+--------------------------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_item_sk->[cs_item_sk];RF1 cr_order_number->[cs_order_number]
+------------------------------------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF4 RF19
+------------------------------------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF4 RF19
+------------------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF19
+--------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------PhysicalOlapScan[store]
+----------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF8 RF9 RF11 RF14 RF16
+------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------PhysicalOlapScan[date_dim(d2)]
+--------------------------------------------------------PhysicalProject
+----------------------------------------------------------PhysicalOlapScan[date_dim(d3)]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[customer_demographics(cd1)]
+------------------------------------------------PhysicalProject
+--------------------------------------------------PhysicalOlapScan[customer_demographics(cd2)]
+--------------------------------------------PhysicalProject
+----------------------------------------------PhysicalOlapScan[promotion]
 ----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[income_band(ib2)]
+------------------------------------------PhysicalOlapScan[household_demographics(hd1)] apply RFs: RF17
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[household_demographics(hd2)] apply RFs: RF18
 --------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer_demographics(cd2)]
+----------------------------------PhysicalOlapScan[customer_address(ad1)]
 ----------------------------PhysicalProject
 ------------------------------PhysicalOlapScan[customer_address(ad2)]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF11 ss_item_sk->[sr_item_sk];RF12 ss_ticket_number->[sr_ticket_number]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[store_returns] apply RFs: RF11 RF12
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF10 ss_addr_sk->[ca_address_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer_address(ad1)] apply RFs: RF10
---------------------------------PhysicalProject
-----------------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=() build RFs:RF9 ss_cdemo_sk->[cd_demo_sk]
-------------------------------------PhysicalProject
---------------------------------------PhysicalOlapScan[customer_demographics(cd1)] apply RFs: RF9
-------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 i_item_sk->[cr_item_sk,cs_item_sk,ss_item_sk]
---------------------------------------PhysicalProject
-----------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[ss_promo_sk]
-------------------------------------------PhysicalProject
---------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF6 s_store_sk->[ss_store_sk]
-----------------------------------------------PhysicalProject
-------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF5 ib_income_band_sk->[hd_income_band_sk]
---------------------------------------------------PhysicalProject
-----------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF4 hd_demo_sk->[ss_hdemo_sk]
-------------------------------------------------------PhysicalProject
---------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF3 cs_item_sk->[ss_item_sk]
-----------------------------------------------------------PhysicalProject
-------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 RF4 RF6 RF7 RF8
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------filter(d_year IN (2001, 2002))
-------------------------------------------------------------------PhysicalOlapScan[date_dim(d1)]
-----------------------------------------------------------PhysicalProject
-------------------------------------------------------------filter((sale > (2 * refund)))
---------------------------------------------------------------hashAgg[GLOBAL]
-----------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------------------------------hashAgg[LOCAL]
---------------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and (catalog_sales.cs_order_number = catalog_returns.cr_order_number)) otherCondition=() build RFs:RF0 cr_item_sk->[cs_item_sk];RF1 cr_order_number->[cs_order_number]
-------------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF8
-------------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF8
-------------------------------------------------------PhysicalProject
---------------------------------------------------------PhysicalOlapScan[household_demographics(hd1)] apply RFs: RF5
---------------------------------------------------PhysicalProject
-----------------------------------------------------PhysicalOlapScan[income_band(ib1)]
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[store]
-------------------------------------------PhysicalProject
---------------------------------------------PhysicalOlapScan[promotion]
---------------------------------------PhysicalProject
-----------------------------------------filter((item.i_current_price <= 33.00) and (item.i_current_price >= 24.00) and i_color IN ('blanched', 'brown', 'burlywood', 'chocolate', 'drab', 'medium'))
-------------------------------------------PhysicalOlapScan[item]
+--------------------------PhysicalOlapScan[income_band(ib1)]
 --------------------PhysicalProject
-----------------------PhysicalOlapScan[date_dim(d2)]
+----------------------PhysicalOlapScan[income_band(ib2)]
 ----------------PhysicalProject
-------------------PhysicalOlapScan[date_dim(d3)]
+------------------filter((item.i_current_price <= 33.00) and (item.i_current_price >= 24.00) and i_color IN ('blanched', 'brown', 'burlywood', 'chocolate', 'drab', 'medium'))
+--------------------PhysicalOlapScan[item]
 --PhysicalResultSink
 ----PhysicalQuickSort[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query82.out b/regression-test/data/shape_check/tpcds_sf100/shape/query82.out
index aa01a8a275e52a..8072e7626228cb 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query82.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query82.out
@@ -8,20 +8,23 @@ PhysicalResultSink
 ----------PhysicalDistribute[DistributionSpecHash]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
+----------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[inv_date_sk]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[store_sales] apply RFs: RF2
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk]
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk]
---------------------------PhysicalProject
-----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
-------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1
+------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
+--------------------------PhysicalOlapScan[inventory] apply RFs: RF1 RF2
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
 --------------------------PhysicalProject
 ----------------------------filter((item.i_current_price <= 47.00) and (item.i_current_price >= 17.00) and i_manufact_id IN (138, 169, 339, 639))
 ------------------------------PhysicalOlapScan[item]
-----------------------PhysicalProject
-------------------------filter((date_dim.d_date <= '1999-09-07') and (date_dim.d_date >= '1999-07-09'))
---------------------------PhysicalOlapScan[date_dim]
+------------------PhysicalProject
+--------------------filter((date_dim.d_date <= '1999-09-07') and (date_dim.d_date >= '1999-07-09'))
+----------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query87.out b/regression-test/data/shape_check/tpcds_sf100/shape/query87.out
index 247dfbbbc89a90..e34d03d1204c07 100644
--- a/regression-test/data/shape_check/tpcds_sf100/shape/query87.out
+++ b/regression-test/data/shape_check/tpcds_sf100/shape/query87.out
@@ -12,9 +12,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
 ------------------------------PhysicalOlapScan[date_dim]
@@ -26,9 +29,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
 ------------------------------PhysicalOlapScan[date_dim]
@@ -40,9 +46,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
 ------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query2.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query2.out
index 197d38b7c71d8f..e9bce4b8168faa 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/hint/query2.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query2.out
@@ -8,12 +8,16 @@ PhysicalCteAnchor ( cteId=CTEId#1 )
 ----------PhysicalProject
 ------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = wscs.sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk,ws_sold_date_sk]
 --------------PhysicalUnion
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalProject
---------------------PhysicalOlapScan[web_sales] apply RFs: RF0
-----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
 --------------PhysicalProject
 ----------------PhysicalOlapScan[date_dim]
 --PhysicalResultSink
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query31.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query31.out
index f4922710b5cc95..4bbfc877903b37 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/hint/query31.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query31.out
@@ -9,9 +9,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------PhysicalProject
 --------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk]
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------hashAgg[GLOBAL]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------hashAgg[LOCAL]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 --------------------PhysicalProject
 ----------------------filter((ss.d_year = 1999) and d_qoy IN (1, 2, 3))
 ------------------------PhysicalOlapScan[date_dim]
@@ -26,9 +29,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ws_bill_addr_sk]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
 ----------------------PhysicalProject
 ------------------------filter((ws.d_year = 1999) and d_qoy IN (1, 2, 3))
 --------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query38.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query38.out
index fc988652648dc2..541c967c35d15b 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/hint/query38.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query38.out
@@ -14,9 +14,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
 ----------------------------------PhysicalOlapScan[date_dim]
@@ -28,9 +31,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
 ----------------------------------PhysicalOlapScan[date_dim]
@@ -42,9 +48,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
 ----------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query42.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query42.out
index a150f3bf0bfbe2..db30fd50e7466c 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/hint/query42.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query42.out
@@ -11,9 +11,12 @@ PhysicalResultSink
 ----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+------------------------hashAgg[GLOBAL]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashAgg[LOCAL]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------------PhysicalProject
 --------------------------filter((item.i_manager_id = 1))
 ----------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query43.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query43.out
index f92452269bcd32..3e470a62065850 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/hint/query43.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query43.out
@@ -10,9 +10,12 @@ PhysicalResultSink
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ----------------------PhysicalProject
 ------------------------filter((date_dim.d_year = 2000))
 --------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query52.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query52.out
index 229d035db52d7d..be5a368feafb41 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/hint/query52.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query52.out
@@ -11,9 +11,12 @@ PhysicalResultSink
 ----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+------------------------hashAgg[GLOBAL]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashAgg[LOCAL]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------------PhysicalProject
 --------------------------filter((item.i_manager_id = 1))
 ----------------------------PhysicalOlapScan[item]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query59.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query59.out
index 3f740535c8bb6a..3f87d620090469 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/hint/query59.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query59.out
@@ -6,9 +6,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------PhysicalDistribute[DistributionSpecHash]
 --------hashAgg[LOCAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------PhysicalProject
-----------------PhysicalOlapScan[store_sales] apply RFs: RF0
+------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[store_sales] apply RFs: RF0
 --------------PhysicalProject
 ----------------PhysicalOlapScan[date_dim]
 --PhysicalResultSink
diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query87.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query87.out
index 9f5547c4459a45..aa6a97e5abf73c 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/hint/query87.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query87.out
@@ -12,9 +12,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
 ------------------------------PhysicalOlapScan[date_dim]
@@ -26,9 +29,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
 ------------------------------PhysicalOlapScan[date_dim]
@@ -40,9 +46,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
 ------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query37.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query37.out
index 99e27bee90be73..be81f11399d997 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query37.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query37.out
@@ -8,20 +8,23 @@ PhysicalResultSink
 ----------PhysicalDistribute[DistributionSpecHash]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk]
+----------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[inv_date_sk]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk]
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk]
---------------------------PhysicalProject
-----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
-------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1
+------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
+--------------------------PhysicalOlapScan[inventory] apply RFs: RF1 RF2
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
 --------------------------PhysicalProject
 ----------------------------filter((item.i_current_price <= 59.00) and (item.i_current_price >= 29.00) and i_manufact_id IN (705, 742, 777, 944))
 ------------------------------PhysicalOlapScan[item]
-----------------------PhysicalProject
-------------------------filter((date_dim.d_date <= '2002-05-28') and (date_dim.d_date >= '2002-03-29'))
---------------------------PhysicalOlapScan[date_dim]
+------------------PhysicalProject
+--------------------filter((date_dim.d_date <= '2002-05-28') and (date_dim.d_date >= '2002-03-29'))
+----------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query38.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query38.out
index fc988652648dc2..541c967c35d15b 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query38.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query38.out
@@ -14,9 +14,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ws_bill_customer_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
 ----------------------------------PhysicalOlapScan[date_dim]
@@ -28,9 +31,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
 ----------------------------------PhysicalOlapScan[date_dim]
@@ -42,9 +48,12 @@ PhysicalResultSink
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5
+----------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ss_sold_date_sk]
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF5
 ------------------------------PhysicalProject
 --------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
 ----------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query82.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query82.out
index 336d4fd5175339..c7c9bfe0d57fe6 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query82.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query82.out
@@ -8,20 +8,23 @@ PhysicalResultSink
 ----------PhysicalDistribute[DistributionSpecHash]
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
+----------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[inv_date_sk]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[store_sales] apply RFs: RF2
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = inventory.inv_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk]
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk]
---------------------------PhysicalProject
-----------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
-------------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1
+------------------------filter((inventory.inv_quantity_on_hand <= 500) and (inventory.inv_quantity_on_hand >= 100))
+--------------------------PhysicalOlapScan[inventory] apply RFs: RF1 RF2
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
 --------------------------PhysicalProject
 ----------------------------filter((item.i_current_price <= 88.00) and (item.i_current_price >= 58.00) and i_manufact_id IN (259, 485, 559, 580))
 ------------------------------PhysicalOlapScan[item]
-----------------------PhysicalProject
-------------------------filter((date_dim.d_date <= '2001-03-14') and (date_dim.d_date >= '2001-01-13'))
---------------------------PhysicalOlapScan[date_dim]
+------------------PhysicalProject
+--------------------filter((date_dim.d_date <= '2001-03-14') and (date_dim.d_date >= '2001-01-13'))
+----------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query87.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query87.out
index 9f5547c4459a45..aa6a97e5abf73c 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query87.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query87.out
@@ -12,9 +12,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
 ------------------------------PhysicalOlapScan[date_dim]
@@ -26,9 +29,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
 ------------------------------PhysicalOlapScan[date_dim]
@@ -40,9 +46,12 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
 --------------------------PhysicalProject
 ----------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
 ------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query21.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query21.out
index 7aaa027dd961ca..ea9e85c3155cdd 100644
--- a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query21.out
+++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query21.out
@@ -13,7 +13,7 @@ PhysicalResultSink
 --------------------PhysicalProject
 ----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[inv_date_sk]
 ------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((item.i_item_sk = inventory.inv_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk]
+--------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((item.i_item_sk = inventory.inv_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[inv_item_sk]
 ----------------------------PhysicalOlapScan[inventory] apply RFs: RF0 RF1 RF2
 ----------------------------PhysicalProject
 ------------------------------filter((item.i_current_price <= 1.49) and (item.i_current_price >= 0.99))
diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query22.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query22.out
index 09dedb98772f96..93c02aab1654c3 100644
--- a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query22.out
+++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query22.out
@@ -10,7 +10,7 @@ PhysicalResultSink
 --------------hashAgg[LOCAL]
 ----------------PhysicalRepeat
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk]
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[inv_item_sk]
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[inv_date_sk]
 --------------------------PhysicalProject
diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query39.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query39.out
index b826c2ac59911a..936364b59ddc11 100644
--- a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query39.out
+++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query39.out
@@ -6,7 +6,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------filter(( not (mean = 0.0)) and ((foo.stdev / foo.mean) > 1.0))
 --------hashAgg[GLOBAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[inv_item_sk]
+------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((inventory.inv_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[inv_item_sk]
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((inventory.inv_warehouse_sk = warehouse.w_warehouse_sk)) otherCondition=() build RFs:RF1 w_warehouse_sk->[inv_warehouse_sk]
 ------------------PhysicalProject
diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query5.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query5.out
index f4faa729b2052d..c74e0d87496087 100644
--- a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query5.out
+++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query5.out
@@ -62,12 +62,13 @@ PhysicalResultSink
 --------------------------------------PhysicalDistribute[DistributionSpecExecutionAny]
 ----------------------------------------PhysicalProject
 ------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7
---------------------------------------PhysicalProject
-----------------------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_returns.wr_item_sk = web_sales.ws_item_sk) and (web_returns.wr_order_number = web_sales.ws_order_number)) otherCondition=() build RFs:RF4 wr_item_sk->[ws_item_sk];RF5 wr_order_number->[ws_order_number]
-------------------------------------------PhysicalProject
---------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 RF7
-------------------------------------------PhysicalProject
---------------------------------------------PhysicalOlapScan[web_returns] apply RFs: RF6
+--------------------------------------PhysicalDistribute[DistributionSpecExecutionAny]
+----------------------------------------PhysicalProject
+------------------------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_returns.wr_item_sk = web_sales.ws_item_sk) and (web_returns.wr_order_number = web_sales.ws_order_number)) otherCondition=() build RFs:RF4 wr_item_sk->[ws_item_sk];RF5 wr_order_number->[ws_order_number]
+--------------------------------------------PhysicalProject
+----------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5 RF7
+--------------------------------------------PhysicalProject
+----------------------------------------------PhysicalOlapScan[web_returns] apply RFs: RF6
 ------------------------------------PhysicalProject
 --------------------------------------filter((date_dim.d_date <= '2000-09-02') and (date_dim.d_date >= '2000-08-19'))
 ----------------------------------------PhysicalOlapScan[date_dim]
diff --git a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query78.out b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query78.out
index a3b55973d303c7..5affa616f83ae3 100644
--- a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query78.out
+++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query78.out
@@ -29,16 +29,16 @@ PhysicalResultSink
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[LEFT_ANTI_JOIN shuffle] hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and (web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=()
+----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
 ------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
+--------------------------------hashJoin[LEFT_ANTI_JOIN bucketShuffle] hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and (web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=()
 ----------------------------------PhysicalProject
 ------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
 ----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_year = 1998))
---------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------PhysicalOlapScan[web_returns]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_returns]
+--------------------------------filter((date_dim.d_year = 1998))
+----------------------------------PhysicalOlapScan[date_dim]
 --------------PhysicalProject
 ----------------hashAgg[GLOBAL]
 ------------------PhysicalDistribute[DistributionSpecHash]
diff --git a/regression-test/data/shape_check/tpch_sf1000/hint/q10.out b/regression-test/data/shape_check/tpch_sf1000/hint/q10.out
index 6401919d9fa3e0..4c51f9cfec6ae4 100644
--- a/regression-test/data/shape_check/tpch_sf1000/hint/q10.out
+++ b/regression-test/data/shape_check/tpch_sf1000/hint/q10.out
@@ -9,9 +9,10 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=()
-------------------PhysicalProject
---------------------filter((lineitem.l_returnflag = 'R'))
-----------------------PhysicalOlapScan[lineitem]
+------------------hashAgg[GLOBAL]
+--------------------PhysicalProject
+----------------------filter((lineitem.l_returnflag = 'R'))
+------------------------PhysicalOlapScan[lineitem]
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=()
 ----------------------PhysicalProject
diff --git a/regression-test/data/shape_check/tpch_sf1000/hint/q12.out b/regression-test/data/shape_check/tpch_sf1000/hint/q12.out
index bd619416286f66..e347b5fa0b7c2c 100644
--- a/regression-test/data/shape_check/tpch_sf1000/hint/q12.out
+++ b/regression-test/data/shape_check/tpch_sf1000/hint/q12.out
@@ -9,8 +9,9 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN colocated] hashCondition=((orders.o_orderkey = lineitem.l_orderkey)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[orders]
+------------------hashAgg[GLOBAL]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[orders]
 ------------------PhysicalProject
 --------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP'))
 ----------------------PhysicalOlapScan[lineitem]
diff --git a/regression-test/data/shape_check/tpch_sf1000/hint/q19.out b/regression-test/data/shape_check/tpch_sf1000/hint/q19.out
index 8cfb81dcef67cb..2b60cbf279c101 100644
--- a/regression-test/data/shape_check/tpch_sf1000/hint/q19.out
+++ b/regression-test/data/shape_check/tpch_sf1000/hint/q19.out
@@ -6,9 +6,12 @@ PhysicalResultSink
 ------hashAgg[LOCAL]
 --------PhysicalProject
 ----------hashJoin[INNER_JOIN broadcast] hashCondition=((part.p_partkey = lineitem.l_partkey)) otherCondition=(OR[AND[(part.p_brand = 'Brand#12'),p_container IN ('SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'),(lineitem.l_quantity <= 11.00),(part.p_size <= 5)],AND[(part.p_brand = 'Brand#23'),p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG'),(lineitem.l_quantity >= 10.00),(lineitem.l_quantity <= 20.00),(part.p_size <= 10)],AND[(part.p_brand = 'Brand#34'),p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG'),(lineitem.l_quantity >= 20.00)]])
-------------PhysicalProject
---------------filter((lineitem.l_quantity <= 30.00) and (lineitem.l_quantity >= 1.00) and (lineitem.l_shipinstruct = 'DELIVER IN PERSON') and l_shipmode IN ('AIR REG', 'AIR'))
-----------------PhysicalOlapScan[lineitem]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------filter((lineitem.l_quantity <= 30.00) and (lineitem.l_quantity >= 1.00) and (lineitem.l_shipinstruct = 'DELIVER IN PERSON') and l_shipmode IN ('AIR REG', 'AIR'))
+----------------------PhysicalOlapScan[lineitem]
 ------------PhysicalProject
 --------------filter((part.p_size <= 15) and (part.p_size >= 1) and OR[AND[(part.p_brand = 'Brand#12'),p_container IN ('SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'),(part.p_size <= 5)],AND[(part.p_brand = 'Brand#23'),p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG'),(part.p_size <= 10)],AND[(part.p_brand = 'Brand#34'),p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG')]] and p_brand IN ('Brand#12', 'Brand#23', 'Brand#34') and p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG', 'MED BAG', 'MED BOX', 'MED PACK', 'MED PKG', 'SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'))
 ----------------PhysicalOlapScan[part]
diff --git a/regression-test/data/shape_check/tpch_sf1000/hint/q3.out b/regression-test/data/shape_check/tpch_sf1000/hint/q3.out
index d4cf366b41a7d2..fa0190006f1946 100644
--- a/regression-test/data/shape_check/tpch_sf1000/hint/q3.out
+++ b/regression-test/data/shape_check/tpch_sf1000/hint/q3.out
@@ -7,9 +7,10 @@ PhysicalResultSink
 --------hashAgg[GLOBAL]
 ----------PhysicalProject
 ------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=()
---------------PhysicalProject
-----------------filter((lineitem.l_shipdate > '1995-03-15'))
-------------------PhysicalOlapScan[lineitem]
+--------------hashAgg[GLOBAL]
+----------------PhysicalProject
+------------------filter((lineitem.l_shipdate > '1995-03-15'))
+--------------------PhysicalOlapScan[lineitem]
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=()
 ------------------PhysicalProject
diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q10.out b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q10.out
index 860280d12dd4cb..dd24288ff8772a 100644
--- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q10.out
+++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q10.out
@@ -5,20 +5,23 @@ PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
 --------hashAgg[GLOBAL]
-----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=()
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF1 o_custkey->[c_custkey]
+----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=()
 ------------------PhysicalProject
---------------------PhysicalOlapScan[customer] apply RFs: RF1
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN colocated] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF0 o_orderkey->[l_orderkey]
-----------------------PhysicalProject
-------------------------filter((lineitem.l_returnflag = 'R'))
---------------------------PhysicalOlapScan[lineitem] apply RFs: RF0
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalProject
+--------------------------filter((lineitem.l_returnflag = 'R'))
+----------------------------PhysicalOlapScan[lineitem] apply RFs: RF1
 ----------------------PhysicalProject
-------------------------filter((orders.o_orderdate < '1994-01-01') and (orders.o_orderdate >= '1993-10-01'))
---------------------------PhysicalOlapScan[orders]
---------------PhysicalProject
-----------------PhysicalOlapScan[nation]
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 o_custkey->[c_custkey]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[customer] apply RFs: RF0
+--------------------------PhysicalProject
+----------------------------filter((orders.o_orderdate < '1994-01-01') and (orders.o_orderdate >= '1993-10-01'))
+------------------------------PhysicalOlapScan[orders]
+------------------PhysicalProject
+--------------------PhysicalOlapScan[nation]
 
diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q11.out b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q11.out
index 32ac3f813c6280..46d681549ebf3d 100644
--- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q11.out
+++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q11.out
@@ -24,14 +24,17 @@ PhysicalResultSink
 ----------------PhysicalDistribute[DistributionSpecGather]
 ------------------hashAgg[LOCAL]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((partsupp.ps_suppkey = supplier.s_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[ps_suppkey]
+----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF1 n_nationkey->[s_nationkey]
 ------------------------PhysicalProject
---------------------------PhysicalOlapScan[partsupp] apply RFs: RF1
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF0 n_nationkey->[s_nationkey]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[supplier] apply RFs: RF0
+--------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((partsupp.ps_suppkey = supplier.s_suppkey)) otherCondition=() build RFs:RF0 s_suppkey->[ps_suppkey]
+----------------------------hashAgg[GLOBAL]
+------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------hashAgg[LOCAL]
+----------------------------------PhysicalProject
+------------------------------------PhysicalOlapScan[partsupp] apply RFs: RF0
 ----------------------------PhysicalProject
-------------------------------filter((nation.n_name = 'GERMANY'))
---------------------------------PhysicalOlapScan[nation]
+------------------------------PhysicalOlapScan[supplier] apply RFs: RF1
+------------------------PhysicalProject
+--------------------------filter((nation.n_name = 'GERMANY'))
+----------------------------PhysicalOlapScan[nation]
 
diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q12.out b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q12.out
index 8df830dd428e58..dc6bf364a29db9 100644
--- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q12.out
+++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q12.out
@@ -9,8 +9,9 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN colocated] hashCondition=((orders.o_orderkey = lineitem.l_orderkey)) otherCondition=() build RFs:RF0 l_orderkey->[o_orderkey]
-------------------PhysicalProject
---------------------PhysicalOlapScan[orders] apply RFs: RF0
+------------------hashAgg[GLOBAL]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[orders] apply RFs: RF0
 ------------------PhysicalProject
 --------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP'))
 ----------------------PhysicalOlapScan[lineitem]
diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q14.out b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q14.out
index 6df1a05fa3b57f..620fc63f7209f1 100644
--- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q14.out
+++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q14.out
@@ -10,6 +10,10 @@ PhysicalResultSink
 --------------PhysicalProject
 ----------------PhysicalOlapScan[part] apply RFs: RF0
 --------------PhysicalProject
-----------------filter((lineitem.l_shipdate < '1995-10-01') and (lineitem.l_shipdate >= '1995-09-01'))
-------------------PhysicalOlapScan[lineitem]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------filter((lineitem.l_shipdate < '1995-10-01') and (lineitem.l_shipdate >= '1995-09-01'))
+--------------------------PhysicalOlapScan[lineitem]
 
diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q19.out b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q19.out
index 78faf3234691b3..41356bdc89baa1 100644
--- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q19.out
+++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q19.out
@@ -5,10 +5,13 @@ PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
 --------PhysicalProject
-----------hashJoin[INNER_JOIN broadcast] hashCondition=((part.p_partkey = lineitem.l_partkey)) otherCondition=(OR[AND[(part.p_brand = 'Brand#12'),p_container IN ('SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'),(lineitem.l_quantity <= 11.00),(part.p_size <= 5)],AND[(part.p_brand = 'Brand#23'),p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG'),(lineitem.l_quantity >= 10.00),(lineitem.l_quantity <= 20.00),(part.p_size <= 10)],AND[(part.p_brand = 'Brand#34'),p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG'),(lineitem.l_quantity >= 20.00)]]) build RFs:RF0 p_partkey->[l_partkey]
-------------PhysicalProject
---------------filter((lineitem.l_quantity <= 30.00) and (lineitem.l_quantity >= 1.00) and (lineitem.l_shipinstruct = 'DELIVER IN PERSON') and l_shipmode IN ('AIR REG', 'AIR'))
-----------------PhysicalOlapScan[lineitem] apply RFs: RF0
+----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((part.p_partkey = lineitem.l_partkey)) otherCondition=(OR[AND[(part.p_brand = 'Brand#12'),p_container IN ('SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'),(lineitem.l_quantity <= 11.00),(part.p_size <= 5)],AND[(part.p_brand = 'Brand#23'),p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG'),(lineitem.l_quantity >= 10.00),(lineitem.l_quantity <= 20.00),(part.p_size <= 10)],AND[(part.p_brand = 'Brand#34'),p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG'),(lineitem.l_quantity >= 20.00)]]) build RFs:RF0 p_partkey->[l_partkey]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------filter((lineitem.l_quantity <= 30.00) and (lineitem.l_quantity >= 1.00) and (lineitem.l_shipinstruct = 'DELIVER IN PERSON') and l_shipmode IN ('AIR REG', 'AIR'))
+----------------------PhysicalOlapScan[lineitem] apply RFs: RF0
 ------------PhysicalProject
 --------------filter((part.p_size <= 15) and (part.p_size >= 1) and OR[AND[(part.p_brand = 'Brand#12'),p_container IN ('SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'),(part.p_size <= 5)],AND[(part.p_brand = 'Brand#23'),p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG'),(part.p_size <= 10)],AND[(part.p_brand = 'Brand#34'),p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG')]] and p_brand IN ('Brand#12', 'Brand#23', 'Brand#34') and p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG', 'MED BAG', 'MED BOX', 'MED PACK', 'MED PKG', 'SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'))
 ----------------PhysicalOlapScan[part]
diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q3.out b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q3.out
index 3474f8dcf010c3..4b45fb5d8de73b 100644
--- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q3.out
+++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q3.out
@@ -7,9 +7,10 @@ PhysicalResultSink
 --------hashAgg[GLOBAL]
 ----------PhysicalProject
 ------------hashJoin[INNER_JOIN colocated] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
---------------PhysicalProject
-----------------filter((lineitem.l_shipdate > '1995-03-15'))
-------------------PhysicalOlapScan[lineitem] apply RFs: RF1
+--------------hashAgg[GLOBAL]
+----------------PhysicalProject
+------------------filter((lineitem.l_shipdate > '1995-03-15'))
+--------------------PhysicalOlapScan[lineitem] apply RFs: RF1
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
 ------------------PhysicalProject
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q10.out b/regression-test/data/shape_check/tpch_sf1000/shape/q10.out
index 952ec22ca66935..cf4f3a3ed8d63d 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q10.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q10.out
@@ -5,20 +5,23 @@ PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
 --------hashAgg[GLOBAL]
-----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[c_nationkey]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF1 o_custkey->[c_custkey]
+----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[c_nationkey]
 ------------------PhysicalProject
---------------------PhysicalOlapScan[customer] apply RFs: RF1 RF2
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN colocated] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF0 o_orderkey->[l_orderkey]
-----------------------PhysicalProject
-------------------------filter((lineitem.l_returnflag = 'R'))
---------------------------PhysicalOlapScan[lineitem] apply RFs: RF0
+--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalProject
+--------------------------filter((lineitem.l_returnflag = 'R'))
+----------------------------PhysicalOlapScan[lineitem] apply RFs: RF1
 ----------------------PhysicalProject
-------------------------filter((orders.o_orderdate < '1994-01-01') and (orders.o_orderdate >= '1993-10-01'))
---------------------------PhysicalOlapScan[orders]
---------------PhysicalProject
-----------------PhysicalOlapScan[nation]
+------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 o_custkey->[c_custkey]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[customer] apply RFs: RF0 RF2
+--------------------------PhysicalProject
+----------------------------filter((orders.o_orderdate < '1994-01-01') and (orders.o_orderdate >= '1993-10-01'))
+------------------------------PhysicalOlapScan[orders]
+------------------PhysicalProject
+--------------------PhysicalOlapScan[nation]
 
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q11.out b/regression-test/data/shape_check/tpch_sf1000/shape/q11.out
index 32ac3f813c6280..46d681549ebf3d 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q11.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q11.out
@@ -24,14 +24,17 @@ PhysicalResultSink
 ----------------PhysicalDistribute[DistributionSpecGather]
 ------------------hashAgg[LOCAL]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((partsupp.ps_suppkey = supplier.s_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[ps_suppkey]
+----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF1 n_nationkey->[s_nationkey]
 ------------------------PhysicalProject
---------------------------PhysicalOlapScan[partsupp] apply RFs: RF1
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((supplier.s_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF0 n_nationkey->[s_nationkey]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[supplier] apply RFs: RF0
+--------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((partsupp.ps_suppkey = supplier.s_suppkey)) otherCondition=() build RFs:RF0 s_suppkey->[ps_suppkey]
+----------------------------hashAgg[GLOBAL]
+------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------hashAgg[LOCAL]
+----------------------------------PhysicalProject
+------------------------------------PhysicalOlapScan[partsupp] apply RFs: RF0
 ----------------------------PhysicalProject
-------------------------------filter((nation.n_name = 'GERMANY'))
---------------------------------PhysicalOlapScan[nation]
+------------------------------PhysicalOlapScan[supplier] apply RFs: RF1
+------------------------PhysicalProject
+--------------------------filter((nation.n_name = 'GERMANY'))
+----------------------------PhysicalOlapScan[nation]
 
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q12.out b/regression-test/data/shape_check/tpch_sf1000/shape/q12.out
index 8df830dd428e58..dc6bf364a29db9 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q12.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q12.out
@@ -9,8 +9,9 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN colocated] hashCondition=((orders.o_orderkey = lineitem.l_orderkey)) otherCondition=() build RFs:RF0 l_orderkey->[o_orderkey]
-------------------PhysicalProject
---------------------PhysicalOlapScan[orders] apply RFs: RF0
+------------------hashAgg[GLOBAL]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[orders] apply RFs: RF0
 ------------------PhysicalProject
 --------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP'))
 ----------------------PhysicalOlapScan[lineitem]
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q14.out b/regression-test/data/shape_check/tpch_sf1000/shape/q14.out
index 6df1a05fa3b57f..c3cf6656e5c590 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q14.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q14.out
@@ -9,7 +9,10 @@ PhysicalResultSink
 ------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((lineitem.l_partkey = part.p_partkey)) otherCondition=() build RFs:RF0 l_partkey->[p_partkey]
 --------------PhysicalProject
 ----------------PhysicalOlapScan[part] apply RFs: RF0
---------------PhysicalProject
-----------------filter((lineitem.l_shipdate < '1995-10-01') and (lineitem.l_shipdate >= '1995-09-01'))
-------------------PhysicalOlapScan[lineitem]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------filter((lineitem.l_shipdate < '1995-10-01') and (lineitem.l_shipdate >= '1995-09-01'))
+------------------------PhysicalOlapScan[lineitem]
 
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q19.out b/regression-test/data/shape_check/tpch_sf1000/shape/q19.out
index 78faf3234691b3..41356bdc89baa1 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q19.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q19.out
@@ -5,10 +5,13 @@ PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
 --------PhysicalProject
-----------hashJoin[INNER_JOIN broadcast] hashCondition=((part.p_partkey = lineitem.l_partkey)) otherCondition=(OR[AND[(part.p_brand = 'Brand#12'),p_container IN ('SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'),(lineitem.l_quantity <= 11.00),(part.p_size <= 5)],AND[(part.p_brand = 'Brand#23'),p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG'),(lineitem.l_quantity >= 10.00),(lineitem.l_quantity <= 20.00),(part.p_size <= 10)],AND[(part.p_brand = 'Brand#34'),p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG'),(lineitem.l_quantity >= 20.00)]]) build RFs:RF0 p_partkey->[l_partkey]
-------------PhysicalProject
---------------filter((lineitem.l_quantity <= 30.00) and (lineitem.l_quantity >= 1.00) and (lineitem.l_shipinstruct = 'DELIVER IN PERSON') and l_shipmode IN ('AIR REG', 'AIR'))
-----------------PhysicalOlapScan[lineitem] apply RFs: RF0
+----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((part.p_partkey = lineitem.l_partkey)) otherCondition=(OR[AND[(part.p_brand = 'Brand#12'),p_container IN ('SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'),(lineitem.l_quantity <= 11.00),(part.p_size <= 5)],AND[(part.p_brand = 'Brand#23'),p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG'),(lineitem.l_quantity >= 10.00),(lineitem.l_quantity <= 20.00),(part.p_size <= 10)],AND[(part.p_brand = 'Brand#34'),p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG'),(lineitem.l_quantity >= 20.00)]]) build RFs:RF0 p_partkey->[l_partkey]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------filter((lineitem.l_quantity <= 30.00) and (lineitem.l_quantity >= 1.00) and (lineitem.l_shipinstruct = 'DELIVER IN PERSON') and l_shipmode IN ('AIR REG', 'AIR'))
+----------------------PhysicalOlapScan[lineitem] apply RFs: RF0
 ------------PhysicalProject
 --------------filter((part.p_size <= 15) and (part.p_size >= 1) and OR[AND[(part.p_brand = 'Brand#12'),p_container IN ('SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'),(part.p_size <= 5)],AND[(part.p_brand = 'Brand#23'),p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG'),(part.p_size <= 10)],AND[(part.p_brand = 'Brand#34'),p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG')]] and p_brand IN ('Brand#12', 'Brand#23', 'Brand#34') and p_container IN ('LG BOX', 'LG CASE', 'LG PACK', 'LG PKG', 'MED BAG', 'MED BOX', 'MED PACK', 'MED PKG', 'SM BOX', 'SM CASE', 'SM PACK', 'SM PKG'))
 ----------------PhysicalOlapScan[part]
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q3.out b/regression-test/data/shape_check/tpch_sf1000/shape/q3.out
index 3474f8dcf010c3..4b45fb5d8de73b 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q3.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q3.out
@@ -7,9 +7,10 @@ PhysicalResultSink
 --------hashAgg[GLOBAL]
 ----------PhysicalProject
 ------------hashJoin[INNER_JOIN colocated] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
---------------PhysicalProject
-----------------filter((lineitem.l_shipdate > '1995-03-15'))
-------------------PhysicalOlapScan[lineitem] apply RFs: RF1
+--------------hashAgg[GLOBAL]
+----------------PhysicalProject
+------------------filter((lineitem.l_shipdate > '1995-03-15'))
+--------------------PhysicalOlapScan[lineitem] apply RFs: RF1
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
 ------------------PhysicalProject

From 5b5e2a12dbbb014026e0ad4accffb56959e33da7 Mon Sep 17 00:00:00 2001
From: englefly 
Date: Thu, 15 Jan 2026 11:46:28 +0800
Subject: [PATCH 17/21] DORIS-24150

---
 .../nereids/rules/rewrite/AdjustNullable.java | 16 ++--------------
 .../eageraggregation/PushDownAggregation.java | 19 ++++++++++++++-----
 2 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java
index 14973c986f888c..204a018fbc7cb5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java
@@ -80,20 +80,8 @@ public class AdjustNullable extends DefaultPlanRewriter> imple
 
     private final boolean isAnalyzedPhase;
 
-    /**
-     * When check is true, if we find a slot that is non-nullable in the plan,
-     * but we infer it should be nullable from the plan's subtree, and fe_debug is true,
-     * then throw an exception.
-     */
-    private final boolean check;
-
-    public AdjustNullable(boolean isAnalyzedPhase, boolean check) {
-        this.isAnalyzedPhase = isAnalyzedPhase;
-        this.check = check;
-    }
-
     public AdjustNullable(boolean isAnalyzedPhase) {
-        this(isAnalyzedPhase, !isAnalyzedPhase);
+        this.isAnalyzedPhase = isAnalyzedPhase;
     }
 
     @Override
@@ -508,7 +496,7 @@ private  Optional updateExpression(Optional input,
     private  Optional updateExpression(T input,
             Map replaceMap, boolean debugCheck) {
         AtomicBoolean changed = new AtomicBoolean(false);
-        Expression replaced = doUpdateExpression(changed, input, replaceMap, check && debugCheck);
+        Expression replaced = doUpdateExpression(changed, input, replaceMap, !isAnalyzedPhase && debugCheck);
         return changed.get() ? Optional.of((T) replaced) : Optional.empty();
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
index d8ff3ce5c3e510..ef1328c333dbc8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
@@ -34,6 +34,7 @@
 
 package org.apache.doris.nereids.rules.rewrite.eageraggregation;
 
+import org.apache.doris.common.NereidsException;
 import org.apache.doris.nereids.jobs.JobContext;
 import org.apache.doris.nereids.rules.analysis.NormalizeAggregate;
 import org.apache.doris.nereids.rules.rewrite.AdjustNullable;
@@ -51,7 +52,6 @@
 import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
 import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
 import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
-import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
 import org.apache.doris.nereids.trees.plans.logical.LogicalRelation;
 import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
@@ -94,11 +94,22 @@ public class PushDownAggregation extends DefaultPlanRewriter impleme
 
     @Override
     public Plan rewriteRoot(Plan plan, JobContext jobContext) {
+        if (SessionVariable.isFeDebug()) {
+            try {
+                new AdjustNullable(false).rewriteRoot(plan, null);
+            } catch (Exception e) {
+                throw new NereidsException("(PushDownAggregation) input plan has nullable problem", e);
+            }
+        }
         int mode = SessionVariable.getEagerAggregationMode();
         if (mode < 0) {
             return plan;
         } else {
-            return plan.accept(this, jobContext);
+            Plan result = plan.accept(this, jobContext);
+            if (SessionVariable.isFeDebug()) {
+                result = new AdjustNullable(true).rewriteRoot(result, null);
+            }
+            return result;
         }
     }
 
@@ -185,10 +196,8 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
                 LogicalAggregate eagerAgg =
                         agg.withAggOutputChild(newOutputExpressions, child);
                 NormalizeAggregate normalizeAggregate = new NormalizeAggregate();
-                LogicalPlan normalized = normalizeAggregate.normalizeAgg(eagerAgg, Optional.empty(),
+                return normalizeAggregate.normalizeAgg(eagerAgg, Optional.empty(),
                         context.getCascadesContext());
-                AdjustNullable adjustNullable = new AdjustNullable(false, false);
-                return adjustNullable.rewriteRoot(normalized, null);
             }
         } catch (RuntimeException e) {
             String msg = "PushDownAggregation failed: " + e.getMessage() + "\n" + agg.treeString();

From 29da6a9e6f222e0110ed142022ce12e5d928e138 Mon Sep 17 00:00:00 2001
From: englefly 
Date: Wed, 14 Jan 2026 14:52:09 +0800
Subject: [PATCH 18/21] =?UTF-8?q?1.=20exprId=20=E7=9A=84=E7=AD=89=E5=80=BC?=
 =?UTF-8?q?=E5=88=A4=E6=96=AD,=202.update=20rt.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../eageraggregation/EagerAggRewriter.java    |  2 +-
 .../data/nereids_p0/eager_agg/eager_agg.out   | 63 ++++++++++---------
 .../nereids_p0/eager_agg/eager_agg.groovy     |  9 ++-
 3 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
index 32db11e6cfa6e8..b7122fc4a8f89a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriter.java
@@ -227,7 +227,7 @@ private boolean canPushThroughProject(LogicalProject project, Pu
         // if x is not used as group key, do not push through
         for (Slot slot : context.getAggFunctionsInputSlots()) {
             for (NamedExpression prj : project.getProjects()) {
-                if (prj instanceof Alias && prj.getExprId() == slot.getExprId()) {
+                if (prj instanceof Alias && prj.getExprId().equals(slot.getExprId())) {
                     if (prj.getInputSlots().stream()
                             .anyMatch(
                                     s -> project.getOutputSet().contains(s)
diff --git a/regression-test/data/nereids_p0/eager_agg/eager_agg.out b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
index c1d7ed845a0749..0a725fae0c7a69 100644
--- a/regression-test/data/nereids_p0/eager_agg/eager_agg.out
+++ b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
@@ -5,14 +5,14 @@ PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecHash]
 ------hashAgg[LOCAL]
 --------PhysicalProject
-----------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
+----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
 ------------hashAgg[GLOBAL]
 --------------PhysicalDistribute[DistributionSpecHash]
 ----------------hashAgg[LOCAL]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
+--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF0 ws_item_sk->[ss_item_sk];RF1 ws_item_sk->[ss_item_sk]
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales(ss)]
+------------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF0 RF1
 ----------------------PhysicalProject
 ------------------------PhysicalOlapScan[web_sales(ws)]
 ------------PhysicalProject
@@ -31,14 +31,14 @@ PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecHash]
 ------hashAgg[LOCAL]
 --------PhysicalProject
-----------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
+----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
 ------------hashAgg[GLOBAL]
 --------------PhysicalDistribute[DistributionSpecHash]
 ----------------hashAgg[LOCAL]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
+--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF0 ws_item_sk->[ss_item_sk];RF1 ws_item_sk->[ss_item_sk]
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales(ss)]
+------------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF0 RF1
 ----------------------PhysicalProject
 ------------------------PhysicalOlapScan[web_sales(ws)]
 ------------PhysicalProject
@@ -57,14 +57,14 @@ PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecHash]
 ------hashAgg[LOCAL]
 --------PhysicalProject
-----------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
+----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
 ------------hashAgg[GLOBAL]
 --------------PhysicalDistribute[DistributionSpecHash]
 ----------------hashAgg[LOCAL]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
+--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF0 ws_item_sk->[ss_item_sk];RF1 ws_item_sk->[ss_item_sk]
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales(ss)]
+------------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF0 RF1
 ----------------------PhysicalProject
 ------------------------PhysicalOlapScan[web_sales(ws)]
 ------------PhysicalProject
@@ -83,11 +83,11 @@ PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecHash]
 ------hashAgg[LOCAL]
 --------PhysicalProject
-----------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
+----------hashJoin[INNER_JOIN shuffle] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
 ------------PhysicalProject
---------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
+--------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF0 ws_item_sk->[ss_item_sk];RF1 ws_item_sk->[ss_item_sk]
 ----------------PhysicalProject
-------------------PhysicalOlapScan[store_sales(ss)]
+------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF0 RF1
 ----------------PhysicalProject
 ------------------PhysicalOlapScan[web_sales(ws)]
 ------------PhysicalProject
@@ -107,14 +107,14 @@ PhysicalResultSink
 ------PhysicalDistribute[DistributionSpecHash]
 --------hashAgg[LOCAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
+------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF2 ws_item_sk->[ss_item_sk];RF3 ws_item_sk->[ss_item_sk]
 --------------hashAgg[GLOBAL]
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------hashAgg[LOCAL]
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
+----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
 ------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales(ss)]
+--------------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF2 RF3
 ------------------------PhysicalProject
 --------------------------PhysicalOlapScan[date_dim(dt)]
 --------------PhysicalProject
@@ -134,14 +134,14 @@ PhysicalResultSink
 ------PhysicalDistribute[DistributionSpecHash]
 --------hashAgg[LOCAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
+------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF2 ws_item_sk->[ss_item_sk];RF3 ws_item_sk->[ss_item_sk]
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN broadcast] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
+----------------hashJoin[INNER_JOIN shuffle] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
 ------------------hashAgg[GLOBAL]
 --------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------hashAgg[LOCAL]
 ------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales(ss)]
+--------------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF2 RF3
 ------------------PhysicalProject
 --------------------PhysicalOlapScan[date_dim(dt)]
 --------------PhysicalProject
@@ -158,18 +158,25 @@ SyntaxError:
 PhysicalResultSink
 --PhysicalProject
 ----hashAgg[GLOBAL]
-------PhysicalProject
---------hashJoin[INNER_JOIN broadcast] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=()
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
+------------hashJoin[INNER_JOIN shuffle] hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk)) otherCondition=()
 --------------PhysicalProject
-----------------PhysicalOlapScan[item]
---------------hashAgg[GLOBAL]
-----------------PhysicalProject
-------------------PhysicalOlapScan[web_sales]
-----------PhysicalProject
-------------PhysicalOlapScan[date_dim]
+----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=()
+------------------hashAgg[GLOBAL]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[web_sales]
+------------------PhysicalProject
+--------------------PhysicalOlapScan[item]
+--------------PhysicalProject
+----------------PhysicalOlapScan[date_dim]
 
--- !sum_if_push --
+Hint log:
+Used: leading({ web_sales item } date_dim )
+UnUsed:
+SyntaxError:
+
+-- !sum_if_push_exe --
 1	\N	\N	\N	\N	\N	\N
 
diff --git a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
index b59569ab2656f5..0f51e4d1961e2d 100644
--- a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
+++ b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
@@ -19,6 +19,9 @@ suite("eager_agg") {
     sql """
         set eager_aggregation_mode=1;
         set eager_aggregation_on_join=true;
+        set runtime_filter_mode=0;
+        set broadcast_row_count_limit=-1;
+        set disable_nereids_rules="SALT_JOIN";
     """
 
     // push to ss-join-ws
@@ -179,7 +182,7 @@ suite("eager_agg") {
 
     qt_sum_if_push """
         explain shape plan
-        select d_week_seq,
+        select /*+leading({web_sales item} date_dim)*/ d_week_seq,
                 sum(case when (d_day_name='Monday') then ws_sales_price else null end) mon_sales,
                 sum(case when (d_day_name='Tuesday') then ws_sales_price else  null end) tue_sales,
                 sum(case when (d_day_name='Wednesday') then ws_sales_price else null end) wed_sales,
@@ -191,8 +194,8 @@ suite("eager_agg") {
         group by d_week_seq, ws_item_sk;
         """
 
-    qt_sum_if_push """
-        select d_week_seq,
+    qt_sum_if_push_exe """
+        select /*+leading({web_sales item} date_dim)*/ d_week_seq,
                 sum(case when (d_day_name='Monday') then ws_sales_price else null end) mon_sales,
                 sum(case when (d_day_name='Tuesday') then ws_sales_price else  null end) tue_sales,
                 sum(case when (d_day_name='Wednesday') then ws_sales_price else null end) wed_sales,

From b53da7ff428a0f59f85e5d487c0bd51012b16155 Mon Sep 17 00:00:00 2001
From: englefly 
Date: Thu, 15 Jan 2026 12:47:18 +0800
Subject: [PATCH 19/21] doris-24150 rt case

---
 .../data/nereids_p0/eager_agg/eager_agg.out   | 45 +++++++++++++------
 .../nereids_p0/eager_agg/eager_agg.groovy     | 21 +++++++++
 2 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/regression-test/data/nereids_p0/eager_agg/eager_agg.out b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
index 0a725fae0c7a69..85cd1286153c0c 100644
--- a/regression-test/data/nereids_p0/eager_agg/eager_agg.out
+++ b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
@@ -10,9 +10,9 @@ PhysicalResultSink
 --------------PhysicalDistribute[DistributionSpecHash]
 ----------------hashAgg[LOCAL]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF0 ws_item_sk->[ss_item_sk];RF1 ws_item_sk->[ss_item_sk]
+--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF0 RF1
+------------------------PhysicalOlapScan[store_sales(ss)]
 ----------------------PhysicalProject
 ------------------------PhysicalOlapScan[web_sales(ws)]
 ------------PhysicalProject
@@ -36,9 +36,9 @@ PhysicalResultSink
 --------------PhysicalDistribute[DistributionSpecHash]
 ----------------hashAgg[LOCAL]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF0 ws_item_sk->[ss_item_sk];RF1 ws_item_sk->[ss_item_sk]
+--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF0 RF1
+------------------------PhysicalOlapScan[store_sales(ss)]
 ----------------------PhysicalProject
 ------------------------PhysicalOlapScan[web_sales(ws)]
 ------------PhysicalProject
@@ -62,9 +62,9 @@ PhysicalResultSink
 --------------PhysicalDistribute[DistributionSpecHash]
 ----------------hashAgg[LOCAL]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF0 ws_item_sk->[ss_item_sk];RF1 ws_item_sk->[ss_item_sk]
+--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF0 RF1
+------------------------PhysicalOlapScan[store_sales(ss)]
 ----------------------PhysicalProject
 ------------------------PhysicalOlapScan[web_sales(ws)]
 ------------PhysicalProject
@@ -85,9 +85,9 @@ PhysicalResultSink
 --------PhysicalProject
 ----------hashJoin[INNER_JOIN shuffle] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
 ------------PhysicalProject
---------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF0 ws_item_sk->[ss_item_sk];RF1 ws_item_sk->[ss_item_sk]
+--------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
 ----------------PhysicalProject
-------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF0 RF1
+------------------PhysicalOlapScan[store_sales(ss)]
 ----------------PhysicalProject
 ------------------PhysicalOlapScan[web_sales(ws)]
 ------------PhysicalProject
@@ -107,14 +107,14 @@ PhysicalResultSink
 ------PhysicalDistribute[DistributionSpecHash]
 --------hashAgg[LOCAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF2 ws_item_sk->[ss_item_sk];RF3 ws_item_sk->[ss_item_sk]
+------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
 --------------hashAgg[GLOBAL]
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------hashAgg[LOCAL]
 --------------------PhysicalProject
 ----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
 ------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF2 RF3
+--------------------------PhysicalOlapScan[store_sales(ss)]
 ------------------------PhysicalProject
 --------------------------PhysicalOlapScan[date_dim(dt)]
 --------------PhysicalProject
@@ -134,14 +134,14 @@ PhysicalResultSink
 ------PhysicalDistribute[DistributionSpecHash]
 --------hashAgg[LOCAL]
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=() build RFs:RF2 ws_item_sk->[ss_item_sk];RF3 ws_item_sk->[ss_item_sk]
+------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk = ws.ws_item_sk)) otherCondition=()
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN shuffle] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
+----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((dt.d_date_sk = ss.ss_sold_date_sk)) otherCondition=()
 ------------------hashAgg[GLOBAL]
 --------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------hashAgg[LOCAL]
 ------------------------PhysicalProject
---------------------------PhysicalOlapScan[store_sales(ss)] apply RFs: RF2 RF3
+--------------------------PhysicalOlapScan[store_sales(ss)]
 ------------------PhysicalProject
 --------------------PhysicalOlapScan[date_dim(dt)]
 --------------PhysicalProject
@@ -180,3 +180,22 @@ SyntaxError:
 -- !sum_if_push_exe --
 1	\N	\N	\N	\N	\N	\N
 
+-- !check_nullable --
+PhysicalResultSink
+--PhysicalProject
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[RIGHT_OUTER_JOIN bucketShuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=()
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[store_sales]
+--------------PhysicalProject
+----------------PhysicalOlapScan[date_dim]
+
+-- !check_nullable_exe --
+11.00
+
diff --git a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
index 0f51e4d1961e2d..0e6847a38cf183 100644
--- a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
+++ b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
@@ -206,4 +206,25 @@ suite("eager_agg") {
                         join date_dim on d_date_sk = ws_sold_date_sk
         group by d_week_seq, ws_item_sk;
         """
+
+    qt_check_nullable """
+    explain shape plan
+    select /*+SET_VAR(eager_aggregation_mode=1, disable_join_reorder = true)*/ a + ss_sales_price 
+    from (
+    select sum(case when ss_item_sk =1 then 1 else 0 end) a, ss_sales_price
+    from store_sales 
+      right join date_dim on d_date_sk = ss_sold_date_sk
+    group by ss_sales_price
+    )t;
+    """
+
+    qt_check_nullable_exe """
+    select /*+SET_VAR(eager_aggregation_mode=1, disable_join_reorder = true)*/ a + ss_sales_price 
+    from (
+    select sum(case when ss_item_sk =1 then 1 else 0 end) a, ss_sales_price
+    from store_sales 
+      right join date_dim on d_date_sk = ss_sold_date_sk
+    group by ss_sales_price
+    )t;
+    """
 }

From 16a85802f777805ec0c3ce8fb931f1b29faaa3e6 Mon Sep 17 00:00:00 2001
From: englefly 
Date: Thu, 15 Jan 2026 18:44:48 +0800
Subject: [PATCH 20/21] DORIS-24151

---
 .../eageraggregation/PushDownAggregation.java       |  6 ++++++
 .../data/nereids_p0/eager_agg/eager_agg.out         | 13 +++++++++++++
 .../suites/nereids_p0/eager_agg/eager_agg.groovy    |  9 +++++++++
 3 files changed, 28 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
index ef1328c333dbc8..473a7a04d7eaac 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
@@ -143,6 +143,12 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
                     && !aggFunction.isDistinct()) {
                 if (aggFunction instanceof Sum && ((Sum) aggFunction).child() instanceof If) {
                     If body = (If) ((Sum) aggFunction).child();
+                    Set valueSlots = Sets.newHashSet(body.getTrueValue().getInputSlots());
+                    valueSlots.addAll(body.getFalseValue().getInputSlots());
+                    if (body.getCondition().getInputSlots().stream().anyMatch(s -> valueSlots.contains(s))) {
+                        // do not push down sum(if a then a else b)
+                        return agg;
+                    }
                     aggFunctions.add(new Sum(body.getTrueValue()));
                     if (!(body.getFalseValue() instanceof NullLiteral)) {
                         aggFunctions.add(new Sum(body.getFalseValue()));
diff --git a/regression-test/data/nereids_p0/eager_agg/eager_agg.out b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
index 85cd1286153c0c..57c0e6b0db7e0c 100644
--- a/regression-test/data/nereids_p0/eager_agg/eager_agg.out
+++ b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
@@ -199,3 +199,16 @@ PhysicalResultSink
 -- !check_nullable_exe --
 11.00
 
+-- !check_no_push_value_slots_contains_if_slots --
+PhysicalResultSink
+--PhysicalProject
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN shuffle] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=()
+--------------PhysicalProject
+----------------PhysicalOlapScan[store_sales]
+--------------PhysicalProject
+----------------PhysicalOlapScan[date_dim]
+
diff --git a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
index 0e6847a38cf183..c924772149facc 100644
--- a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
+++ b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
@@ -227,4 +227,13 @@ suite("eager_agg") {
     group by ss_sales_price
     )t;
     """
+
+    qt_check_no_push_value_slots_contains_if_slots """
+    explain shape plan
+    select /*+SET_VAR(eager_aggregation_mode=1, disable_join_reorder = false)*/ 
+        sum(case when ss_item_sk =1 then ss_item_sk else 0 end) a 
+    from store_sales 
+      join  date_dim on d_date_sk = ss_sold_date_sk
+    group by d_year;
+    """
 }

From f6e321f9871add6316aa107f3cdd48ec3b4b3694 Mon Sep 17 00:00:00 2001
From: englefly 
Date: Fri, 16 Jan 2026 10:19:32 +0800
Subject: [PATCH 21/21] a

---
 ddl.sql                                       | 247 ++++++++++++++++++
 .../eageraggregation/PushDownAggregation.java |   2 +-
 test.sql                                      |  13 +
 3 files changed, 261 insertions(+), 1 deletion(-)
 create mode 100644 ddl.sql
 create mode 100644 test.sql

diff --git a/ddl.sql b/ddl.sql
new file mode 100644
index 00000000000000..ed47bc6d7659d0
--- /dev/null
+++ b/ddl.sql
@@ -0,0 +1,247 @@
+
+drop database if exists rqg;
+create database rqg;
+use rqg;
+
+drop table if exists store_sales;
+  drop table if exists date_dim;
+  drop table if exists web_sales;
+  
+  CREATE TABLE `store_sales` (
+  `ss_sold_date_sk` bigint NULL,
+  `ss_sold_time_sk` bigint NULL,
+  `ss_item_sk` bigint NULL,
+  `ss_customer_sk` bigint NULL,
+  `ss_cdemo_sk` bigint NULL,
+  `ss_hdemo_sk` bigint NULL,
+  `ss_addr_sk` bigint NULL,
+  `ss_store_sk` bigint NULL,
+  `ss_promo_sk` bigint NULL,
+  `ss_ticket_number` bigint NULL,
+  `ss_quantity` int NULL,
+  `ss_wholesale_cost` decimal(7,2) NULL,
+  `ss_list_price` decimal(7,2) NULL,
+  `ss_sales_price` decimal(7,2) NULL,
+  `ss_ext_discount_amt` decimal(7,2) NULL,
+  `ss_ext_sales_price` decimal(7,2) NULL,
+  `ss_ext_wholesale_cost` decimal(7,2) NULL,
+  `ss_ext_list_price` decimal(7,2) NULL,
+  `ss_ext_tax` decimal(7,2) NULL,
+  `ss_coupon_amt` decimal(7,2) NULL,
+  `ss_net_paid` decimal(7,2) NULL,
+  `ss_net_paid_inc_tax` decimal(7,2) NULL,
+  `ss_net_profit` decimal(7,2) NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`ss_sold_date_sk`, `ss_sold_time_sk`, `ss_item_sk`, `ss_customer_sk`)
+DISTRIBUTED BY HASH(`ss_customer_sk`) BUCKETS 3
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V3",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
+
+CREATE TABLE `date_dim` (
+  `d_date_sk` bigint NULL,
+  `d_date_id` char(16) NULL,
+  `d_date` date NULL,
+  `d_month_seq` int NULL,
+  `d_week_seq` int NULL,
+  `d_quarter_seq` int NULL,
+  `d_year` int NULL,
+  `d_dow` int NULL,
+  `d_moy` int NULL,
+  `d_dom` int NULL,
+  `d_qoy` int NULL,
+  `d_fy_year` int NULL,
+  `d_fy_quarter_seq` int NULL,
+  `d_fy_week_seq` int NULL,
+  `d_day_name` char(9) NULL,
+  `d_quarter_name` char(6) NULL,
+  `d_holiday` char(1) NULL,
+  `d_weekend` char(1) NULL,
+  `d_following_holiday` char(1) NULL,
+  `d_first_dom` int NULL,
+  `d_last_dom` int NULL,
+  `d_same_day_ly` int NULL,
+  `d_same_day_lq` int NULL,
+  `d_current_day` char(1) NULL,
+  `d_current_week` char(1) NULL,
+  `d_current_month` char(1) NULL,
+  `d_current_quarter` char(1) NULL,
+  `d_current_year` char(1) NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`d_date_sk`, `d_date_id`)
+DISTRIBUTED BY HASH(`d_date_id`) BUCKETS 3
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V3",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
+
+CREATE TABLE `web_sales` (
+  `ws_sold_date_sk` bigint NULL,
+  `ws_sold_time_sk` bigint NULL,
+  `ws_ship_date_sk` bigint NULL,
+  `ws_item_sk` bigint NULL,
+  `ws_bill_customer_sk` bigint NULL,
+  `ws_bill_cdemo_sk` bigint NULL,
+  `ws_bill_hdemo_sk` bigint NULL,
+  `ws_bill_addr_sk` bigint NULL,
+  `ws_ship_customer_sk` bigint NULL,
+  `ws_ship_cdemo_sk` bigint NULL,
+  `ws_ship_hdemo_sk` bigint NULL,
+  `ws_ship_addr_sk` bigint NULL,
+  `ws_web_page_sk` bigint NULL,
+  `ws_web_site_sk` bigint NULL,
+  `ws_ship_mode_sk` bigint NULL,
+  `ws_warehouse_sk` bigint NULL,
+  `ws_promo_sk` bigint NULL,
+  `ws_order_number` bigint NULL,
+  `ws_quantity` int NULL,
+  `ws_wholesale_cost` decimal(7,2) NULL,
+  `ws_list_price` decimal(7,2) NULL,
+  `ws_sales_price` decimal(7,2) NULL,
+  `ws_ext_discount_amt` decimal(7,2) NULL,
+  `ws_ext_sales_price` decimal(7,2) NULL,
+  `ws_ext_wholesale_cost` decimal(7,2) NULL,
+  `ws_ext_list_price` decimal(7,2) NULL,
+  `ws_ext_tax` decimal(7,2) NULL,
+  `ws_coupon_amt` decimal(7,2) NULL,
+  `ws_ext_ship_cost` decimal(7,2) NULL,
+  `ws_net_paid` decimal(7,2) NULL,
+  `ws_net_paid_inc_tax` decimal(7,2) NULL,
+  `ws_net_paid_inc_ship` decimal(7,2) NULL,
+  `ws_net_paid_inc_ship_tax` decimal(7,2) NULL,
+  `ws_net_profit` decimal(7,2) NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`ws_sold_date_sk`, `ws_sold_time_sk`, `ws_ship_date_sk`, `ws_item_sk`)
+DISTRIBUTED BY HASH(`ws_item_sk`) BUCKETS 3
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V3",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+); 
+
+drop table if exists item;
+CREATE TABLE `item` (
+  `i_item_sk` bigint NULL,
+  `i_item_id` char(16) NULL,
+  `i_rec_start_date` date NULL,
+  `i_rec_end_date` date NULL,
+  `i_item_desc` varchar(200) NULL,
+  `i_current_price` decimal(7,2) NULL,
+  `i_wholesale_cost` decimal(7,2) NULL,
+  `i_brand_id` int NULL,
+  `i_brand` char(50) NULL,
+  `i_class_id` int NULL,
+  `i_class` char(50) NULL,
+  `i_category_id` int NULL,
+  `i_category` char(50) NULL,
+  `i_manufact_id` int NULL,
+  `i_manufact` char(50) NULL,
+  `i_size` char(20) NULL,
+  `i_formulation` char(20) NULL,
+  `i_color` char(20) NULL,
+  `i_units` char(10) NULL,
+  `i_container` char(10) NULL,
+  `i_manager_id` int NULL,
+  `i_product_name` char(50) NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`i_item_sk`, `i_item_id`)
+DISTRIBUTED BY HASH(`i_item_sk`) BUCKETS 3
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1"
+);
+
+INSERT INTO store_sales (
+  ss_sold_date_sk, ss_sold_time_sk, ss_item_sk, ss_customer_sk, ss_cdemo_sk, ss_hdemo_sk,
+  ss_addr_sk, ss_store_sk, ss_promo_sk, ss_ticket_number, ss_quantity,
+  ss_wholesale_cost, ss_list_price, ss_sales_price, ss_ext_discount_amt,
+  ss_ext_sales_price, ss_ext_wholesale_cost, ss_ext_list_price, ss_ext_tax,
+  ss_coupon_amt, ss_net_paid, ss_net_paid_inc_tax, ss_net_profit
+) VALUES (
+  20240101, 36000, 1001, 501, 601, 701,
+  801, 901, 10001, 55500001, 2,
+  10.00, 12.00, 11.00, 2.00,
+  22.00, 20.00, 24.00, 1.54,
+  0.00, 22.00, 23.54, 3.54
+);
+
+INSERT INTO date_dim (
+  d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq, d_quarter_seq, d_year,
+  d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq, d_fy_week_seq,
+  d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday,
+  d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq,
+  d_current_day, d_current_week, d_current_month, d_current_quarter, d_current_year
+) VALUES (
+  20240101, '2024-01-01', '2024-01-01', 1, 1, 1, 2024,
+  1, 1, 1, 1, 2024, 1, 1,
+  'MON', 'Q1', 'N', 'N', 'N',
+  1, 31, 20230101, 20231001,
+  'Y', 'Y', 'Y', 'Y', 'Y'
+);
+
+INSERT INTO web_sales (
+  ws_sold_date_sk, ws_sold_time_sk, ws_ship_date_sk, ws_item_sk,
+  ws_bill_customer_sk, ws_bill_cdemo_sk, ws_bill_hdemo_sk, ws_bill_addr_sk,
+  ws_ship_customer_sk, ws_ship_cdemo_sk, ws_ship_hdemo_sk, ws_ship_addr_sk,
+  ws_web_page_sk, ws_web_site_sk, ws_ship_mode_sk, ws_warehouse_sk, ws_promo_sk,
+  ws_order_number, ws_quantity, ws_wholesale_cost, ws_list_price, ws_sales_price,
+  ws_ext_discount_amt, ws_ext_sales_price, ws_ext_wholesale_cost, ws_ext_list_price,
+  ws_ext_tax, ws_coupon_amt, ws_ext_ship_cost, ws_net_paid, ws_net_paid_inc_tax,
+  ws_net_paid_inc_ship, ws_net_paid_inc_ship_tax, ws_net_profit
+) VALUES (
+  20240101, 43200, 20240103, 2001,
+  601, 701, 801, 901,
+  602, 702, 802, 902,
+  3001, 4001, 5001, 6001, 7001,
+  8800001, 3, 15.00, 18.00, 16.50,
+  4.50, 49.50, 45.00, 54.00,
+  3.47, 0.00, 5.00, 49.50, 52.97,
+  54.50, 58.00, 7.97
+);
+
+INSERT INTO item (
+  i_item_sk, i_item_id, i_rec_start_date, i_rec_end_date,
+  i_item_desc, i_current_price, i_wholesale_cost,
+  i_brand_id, i_brand, i_class_id, i_class,
+  i_category_id, i_category, i_manufact_id, i_manufact,
+  i_size, i_formulation, i_color, i_units, i_container,
+  i_manager_id, i_product_name
+) VALUES
+  (1001, 'ITEM-0001001', '2024-01-01', NULL,
+   'Sample item 1001', 12.00, 10.00,
+   10, 'BrandA', 101, 'ClassA',
+   201, 'CategoryA', 301, 'ManufactA',
+   'M', 'Std', 'Red', 'EA', 'BOX',
+   1, 'Product 1001'),
+  (2001, 'ITEM-0002001', '2024-01-01', NULL,
+   'Sample item 2001', 18.00, 15.00,
+   11, 'BrandB', 102, 'ClassB',
+   202, 'CategoryB', 302, 'ManufactB',
+   'L', 'Std', 'Blue', 'EA', 'BOX',
+   2, 'Product 2001');
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
index 473a7a04d7eaac..92c61f7c7bacb0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
@@ -186,7 +186,7 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, JobConte
                 //                       ->scan(T2)
                 List newOutputExpressions = new ArrayList<>();
                 Map replaceMap = new HashMap<>();
-                for (Expression x : pushDownContext.getAliasMap().keySet()) {
+                for (Expression x : pushDownContext.getAggFunctions()) {
                     replaceMap.put(x.child(0), pushDownContext.getAliasMap().get(x).toSlot());
                 }
 
diff --git a/test.sql b/test.sql
new file mode 100644
index 00000000000000..850870703e8fa8
--- /dev/null
+++ b/test.sql
@@ -0,0 +1,13 @@
+
+use rqg;
+set eager_aggregation_mode=1;
+select /*+leading({ss dt} ws)*/  dt.d_year 
+        ,min(d_year) brand
+        ,sum(d_year) sum_agg
+    from  store_sales ss
+        join date_dim dt
+        join web_sales ws
+    where dt.d_date_sk = ss_sold_date_sk
+    and ss_item_sk = ws_item_sk
+    group by dt.d_year, ss_hdemo_sk + d_moy
+    having brand is null;