presto源码学习笔记 - 执行计划 - 《大数据相关知识》

planFrom

private PlanBuilder planFrom(QuerySpecification node)
{
    RelationPlan relationPlan;
    // 如果From语句存在
    if (node.getFrom().isPresent()) {
        relationPlan = new RelationPlanner(analysis, planSymbolAllocator, idAllocator, lambdaDeclarationToSymbolMap, metadata, session, namedSubPlan, uniqueIdAllocator)
            .process(node.getFrom().get(), null);  // 继续获取From子句进行
    }
    else {
        relationPlan = planImplicitTable();
    }
    return planBuilderFor(relationPlan);
}

process继续处理到Join类
from 子句为Join类型

visitJoin 子句先对join左边子句进行处理，然后对右边子句进行处理

左边子句处理

visitAliasedRelation子句对表继续处理

RelationPlanner::visitTable

 @Override
    protected RelationPlan visitTable(Table node, Void context)
    {
        // 获取表
        // 根据节点Table获取对应的字段信息 
        Scope scope = analysis.getScope(node);
        // 查询from子句是否是with子句
        Query namedQuery = analysis.getNamedQuery(node);
        // 如果包含with子句需要进一步处理
        if (namedQuery != null) {
            RelationPlan subPlan = process(namedQuery, null);
            // Add implicit coercions if view query produces types that don't match the declared output types
            // of the view (e.g., if the underlying tables referenced by the view changed)
            // 如果视图查询生成的类型与视图的声明输出类型不匹配（例如，如果视图引用的基础表发生了变化），则添加隐式强制转换
            Type[] types = scope.getRelationType().getAllFields().stream().map(Field::getType).toArray(Type[]::new);
            RelationPlan withCoercions = addCoercions(subPlan, types);
            if ((!isCTEReuseEnabled(session) || getExecutionPolicy(session).equals("phased"))
                    || (getCteMaxQueueSize(session) < getTaskConcurrency(session) * 2) || !(analysis.getStatement() instanceof Query) || !((Query) analysis.getStatement()).getWith().isPresent()) {
                if (getCteMaxQueueSize(session) < getTaskConcurrency(session) * 2) {
                    LOG.info("Main queue size " + getCteMaxQueueSize(session) + "should be more than 2 times of concurrent task " + getTaskConcurrency(session));
                }
                return new RelationPlan(withCoercions.getRoot(), scope, withCoercions.getFieldMappings());
            }
            Integer commonCTERefNum;
            if (namedSubPlan.containsKey(node.getName())) {
                commonCTERefNum = namedSubPlan.get(node.getName());
            }
            else {
                commonCTERefNum = uniqueIdAllocator.getNextId();
                namedSubPlan.put(node.getName(), commonCTERefNum);
            }
            PlanNode cteNode = new CTEScanNode(idAllocator.getNextId(),
                    withCoercions.getRoot(),
                    withCoercions.getFieldMappings(),
                    Optional.empty(),
                    node.getName().toString(),
                    new HashSet<>(),
                    commonCTERefNum);
            subPlan = new RelationPlan(cteNode, scope, withCoercions.getFieldMappings());
            return subPlan;
        }
        // 获取表句柄
        TableHandle handle = analysis.getTableHandle(node);
        ImmutableList.Builder<Symbol> outputSymbolsBuilder = ImmutableList.builder();
        ImmutableMap.Builder<Symbol, ColumnHandle> columns = ImmutableMap.builder();
        // 遍历字段类型
        for (Field field : scope.getRelationType().getAllFields()) {
            // 字段的名称  字段的类型
            Symbol symbol = planSymbolAllocator.newSymbol(field.getName().get(), field.getType());
            // 缓存到输出列中
            outputSymbolsBuilder.add(symbol);
            columns.put(symbol, analysis.getColumn(field));
        }
        // 输出字段信息
        List<Symbol> outputSymbols = outputSymbolsBuilder.build();
        /// 判断是否是删除语句（DELETE）
        boolean isDeleteTarget = analysis.isDeleteTarget(createQualifiedObjectName(session, node, node.getName()));
        // 创建TableScanNode
        PlanNode root = TableScanNode.newInstance(idAllocator.getNextId(), handle, outputSymbols, columns.build(), ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_DEFAULT, new UUID(0, 0), 0, isDeleteTarget);
        // 从from递归到表名后，生成对应的
        RelationPlan tableScan = new RelationPlan(root, scope, outputSymbols);
        tableScan = addRowFilters(node, tableScan);
        tableScan = addColumnMasks(node, tableScan);
        return tableScan;
    }

addRowFilters的方法

private RelationPlan addRowFilters(Table node, RelationPlan plan)
{
    // 添加行过滤器  初始化计划构造器
    PlanBuilder planBuilder = initializePlanBuilder(plan);
    for (Expression filter : analysis.getRowFilters(node)) {
        planBuilder = subqueryPlanner.handleSubqueries(planBuilder, filter, filter);
        planBuilder = planBuilder.withNewRoot(new FilterNode(
            idAllocator.getNextId(),
            planBuilder.getRoot(),
            castToRowExpression(planBuilder.rewrite(filter))));
    }
    // 跟节点  表与根节点
    return new RelationPlan(planBuilder.getRoot(), plan.getScope(), plan.getFieldMappings());
}

addColumnMasks

 // 添加字段掩码
    private RelationPlan addColumnMasks(Table table, RelationPlan plan)
    {
        Map<String, List<Expression>> columnMasks = analysis.getColumnMasks(table);
        PlanNode root = plan.getRoot();
        List<Symbol> mappings = plan.getFieldMappings();
        // 保存字段映射关系
        TranslationMap translations = new TranslationMap(plan, analysis, lambdaDeclarationToSymbolMap);
        translations.setFieldMappings(mappings);
        PlanBuilder planBuilder = new PlanBuilder(translations, root, analysis.getParameters());
        for (int i = 0; i < plan.getDescriptor().getAllFieldCount(); i++) {
            Field field = plan.getDescriptor().getFieldByIndex(i);
            for (Expression mask : columnMasks.getOrDefault(field.getName().get(), ImmutableList.of())) {
                planBuilder = subqueryPlanner.handleSubqueries(planBuilder, mask, mask);
                Map<Symbol, RowExpression> assignments = new LinkedHashMap<>();
                for (Symbol symbol : root.getOutputSymbols()) {
                    assignments.put(symbol, castToRowExpression(toSymbolReference(symbol)));
                }
                assignments.put(mappings.get(i), castToRowExpression(translations.rewrite(mask)));
                planBuilder = planBuilder.withNewRoot(new ProjectNode(
                        idAllocator.getNextId(),
                        planBuilder.getRoot(),
                        Assignments.copyOf(assignments)));
            }
        }
        return new RelationPlan(planBuilder.getRoot(), plan.getScope(), mappings);
    }

Group分组语句

1.预处理所有标量输入
1）.过滤器表达式需要先投影（group by

分组
1. 重写聚合参数
2. 重写分组列
  1. 这会在考虑复杂表达式之前跟踪分组集（请参阅下面的评论）
  2. 它还用于计算实现 grouping() 所需的描述符
  3. 为了“distinct”的目的，我们需要规范化可能具有不同的列引用，句法形式（例如，“t.a”与“a”）。因此我们需要枚举基于底层的分组集，fieldId 与每个列引用表达式相关联。
  4. 问题在于简单的 group-by 表达式可以是任意表达式（这与 SQL 规范背道而驰）。但是，它们不会影响分组集的数量或“distinct”的行为。我们可以根据 fieldId 计算所有候选分组集，适当地进行重复数据删除，然后将它们与复杂的表达式交叉连接。
  5. 添加复杂的表达式，然后根据计划列具体化分组集
3. 生成 GroupIdNode（多个分组集）或 ProjectNode（单个分组集）
4. 重写聚合
处理之后
1. 重新添加我们在 2.a 中删除的隐式转换
处理并重写所有分组函数