PostgreSQL 语法树分析之前需要了解到知识

为了少走弯路,有必要了解如下概念:

http://www.postgresql.org/docs/9.2/static/querytree.html

The rule system is located between the parser and the planner. It takes the output of the parser, one query tree, and the user-defined rewrite rules, which are also query trees with some extra information, and creates zero or more query trees as result. So its input and output are always things the parser itself could have produced and thus, anything it sees is basically representable as an SQL statement.

parser -- rule sytem -- planner

难道说,PostgreSQL的 执行计划,是基于规则的?

parser 输出的 是  query tree,加入了 用户定义的 rewrite rule后,planner 输出的是 一个或多个 query tree。

Now what is a query tree? It is an internal representation of an SQL statement where the single parts that it is built from are stored separately. These query trees can be shown in the server log if you set the configuration parameters debug_print_parse, debug_print_rewritten, or debug_print_plan. The rule actions are also stored as query trees, in the system catalog pg_rewrite. They are not formatted like the log output, but they contain exactly the same information.

可以通过  debug_print_parse 、debug_print_rewritten、debug_print_plan 来从 服务器Log 输出 plan。

我如果设置了  debug_print_plan后,如下的SQL文,可以得到如下的log:

"WITH test(x) AS (SELECT 1 UNION SELECT 2)
SELECT *  FROM test;
[postgres@lex pg_log]$ cat postgresql-2013-06-06_095516.csv
2013-06-06 09:55:16.125 CST,,,2875,,51afec04.b3b,1,,2013-06-06 09:55:16 CST,,0,LOG,00000,"database system was shut down at 2013-06-06 09:47:45 CST",,,,,,,,,""
2013-06-06 09:55:16.187 CST,,,2879,,51afec04.b3f,1,,2013-06-06 09:55:16 CST,,0,LOG,00000,"autovacuum launcher started",,,,,,,,,""
2013-06-06 09:55:16.188 CST,,,2873,,51afec03.b39,1,,2013-06-06 09:55:15 CST,,0,LOG,00000,"database system is ready to accept connections",,,,,,,,,""
2013-06-06 09:56:05.261 CST,"postgres","postgres",2938,"[local]",51afec12.b7a,1,"SELECT",2013-06-06 09:55:30 CST,2/2,0,LOG,00000,"plan:","   {PLANNEDSTMT 
   :commandType 1 
   :queryId 0 
   :hasReturning false 
   :hasModifyingCTE false 
   :canSetTag true 
   :transientPlan false 
   :planTree 
      {CTESCAN 
      :startup_cost 0.06 
      :total_cost 0.10 
      :plan_rows 2 
      :plan_width 4 
      :targetlist (
         {TARGETENTRY 
         :expr 
            {VAR 
            :varno 1 
            :varattno 1 
            :vartype 23 
            :vartypmod -1 
            :varcollid 0 
            :varlevelsup 0 
            :varnoold 1 
            :varoattno 1 
            :location -1
            }
         :resno 1 
         :resname x 
         :ressortgroupref 0 
         :resorigtbl 0 
         :resorigcol 0 
         :resjunk false
         }
      )
      :qual <> 
      :lefttree <> 
      :righttree <> 
      :initPlan (
         {SUBPLAN 
         :subLinkType 6 
         :testexpr <> 
         :paramIds <> 
         :plan_id 1 
         :plan_name CTE\ test 
         :firstColType 23 
         :firstColTypmod -1 
         :firstColCollation 0 
         :useHashTable false 
         :unknownEqFalse false 
         :setParam (i 0)
         :parParam <> 
         :args <> 
         :startup_cost 0.00 
         :per_call_cost 0.06
         }
      )
      :extParam (b)
      :allParam (b 0)
      :scanrelid 1 
      :ctePlanId 1 
      :cteParam 0
      }
   :rtable (
      {RTE 
      :alias <> 
      :eref 
         {ALIAS 
         :aliasname test 
         :colnames (""x"")
         }
      :rtekind 5 
      :ctename test 
      :ctelevelsup 0 
      :self_reference false 
      :ctecoltypes <> 
      :ctecoltypmods <> 
      :ctecolcollations <> 
      :inh false 
      :inFromCl true 
      :requiredPerms 2 
      :checkAsUser 0 
      :selectedCols (b)
      :modifiedCols (b)
      }
      {RTE 
      :alias 
         {ALIAS 
         :aliasname *SELECT*\ 1 
         :colnames <>
         }
      :eref 
         {ALIAS 
         :aliasname *SELECT*\ 1 
         :colnames (""?column?"")
         }
      :rtekind 1 
      :subquery <> 
      :security_barrier false 
      :inh false 
      :inFromCl false 
      :requiredPerms 0 
      :checkAsUser 0 
      :selectedCols (b)
      :modifiedCols (b)
      }
      {RTE 
      :alias 
         {ALIAS 
         :aliasname *SELECT*\ 2 
         :colnames <>
         }
      :eref 
         {ALIAS 
         :aliasname *SELECT*\ 2 
         :colnames (""?column?"")
         }
      :rtekind 1 
      :subquery <> 
      :security_barrier false 
      :inh false 
      :inFromCl false 
      :requiredPerms 0 
      :checkAsUser 0 
      :selectedCols (b)
      :modifiedCols (b)
      }
   )
   :resultRelations <> 
   :utilityStmt <> 
   :subplans (
      {UNIQUE 
      :startup_cost 0.05 
      :total_cost 0.06 
      :plan_rows 2 
      :plan_width 0 
      :targetlist (
         {TARGETENTRY 
         :expr 
            {VAR 
            :varno 65001 
            :varattno 1 
            :vartype 23 
            :vartypmod -1 
            :varcollid 0 
            :varlevelsup 0 
            :varnoold 1 
            :varoattno 1 
            :location -1
            }
         :resno 1 
         :resname ?column? 
         :ressortgroupref 1 
         :resorigtbl 0 
         :resorigcol 0 
         :resjunk false
         }
      )
      :qual <> 
      :lefttree 
         {SORT 
         :startup_cost 0.05 
         :total_cost 0.06 
         :plan_rows 2 
         :plan_width 0 
         :targetlist (
            {TARGETENTRY 
            :expr 
               {VAR 
               :varno 65001 
               :varattno 1 
               :vartype 23 
               :vartypmod -1 
               :varcollid 0 
               :varlevelsup 0 
               :varnoold 1 
               :varoattno 1 
               :location -1
               }
            :resno 1 
            :resname ?column? 
            :ressortgroupref 1 
            :resorigtbl 0 
            :resorigcol 0 
            :resjunk false
            }
         )
         :qual <> 
         :lefttree 
            {APPEND 
            :startup_cost 0.00 
            :total_cost 0.04 
            :plan_rows 2 
            :plan_width 0 
            :targetlist (
               {TARGETENTRY 
               :expr 
                  {VAR 
                  :varno 65001 
                  :varattno 1 
                  :vartype 23 
                  :vartypmod -1 
                  :varcollid 0 
                  :varlevelsup 0 
                  :varnoold 1 
                  :varoattno 1 
                  :location -1
                  }
               :resno 1 
               :resname ?column? 
               :ressortgroupref 1 
               :resorigtbl 0 
               :resorigcol 0 
               :resjunk false
               }
            )
            :qual <> 
            :lefttree <> 
            :righttree <> 
            :initPlan <> 
            :extParam (b)
            :allParam (b)
            :appendplans (
               {RESULT 
               :startup_cost 0.00 
               :total_cost 0.01 
               :plan_rows 1 
               :plan_width 0 
               :targetlist (
                  {TARGETENTRY 
                  :expr 
                     {CONST 
                     :consttype 23 
                     :consttypmod -1 
                     :constcollid 0 
                     :constlen 4 
                     :constbyval true 
                     :constisnull false 
                     :location 24 
                     :constvalue 4 [ 1 0 0 0 0 0 0 0 ]
                     }
                  :resno 1 
                  :resname ?column? 
                  :ressortgroupref 0 
                  :resorigtbl 0 
                  :resorigcol 0 
                  :resjunk false
                  }
               )
               :qual <> 
               :lefttree <> 
               :righttree <> 
               :initPlan <> 
               :extParam (b)
               :allParam (b)
               :resconstantqual <>
               }
               {RESULT 
               :startup_cost 0.00 
               :total_cost 0.01 
               :plan_rows 1 
               :plan_width 0 
               :targetlist (
                  {TARGETENTRY 
                  :expr 
                     {CONST 
                     :consttype 23 
                     :consttypmod -1 
                     :constcollid 0 
                     :constlen 4 
                     :constbyval true 
                     :constisnull false 
                     :location 39 
                     :constvalue 4 [ 2 0 0 0 0 0 0 0 ]
                     }
                  :resno 1 
                  :resname ?column? 
                  :ressortgroupref 0 
                  :resorigtbl 0 
                  :resorigcol 0 
                  :resjunk false
                  }
               )
               :qual <> 
               :lefttree <> 
               :righttree <> 
               :initPlan <> 
               :extParam (b)
               :allParam (b)
               :resconstantqual <>
               }
            )
            }
         :righttree <> 
         :initPlan <> 
         :extParam (b)
         :allParam (b)
         :numCols 1 
         :sortColIdx 1 
         :sortOperators 97 
         :collations 0 
         :nullsFirst false
         }
      :righttree <> 
      :initPlan <> 
      :extParam (b)
      :allParam (b)
      :numCols 1 
      :uniqColIdx 1 
      :uniqOperators 96
      }
   )
   :rewindPlanIDs (b)
   :rowMarks <> 
   :relationOids <> 
   :invalItems <> 
   :nParamExec 1
   }
",,,,,"WITH test(x) AS (SELECT 1 UNION SELECT 2)
SELECT *  FROM test;",,,"psql"
[postgres@lex pg_log]$ 

重要的是,要了解以下几个概念:

When reading the SQL representations of the query trees in this chapter it is necessary to be able to identify the parts the statement is broken into when it is in the query tree structure. The parts of a query tree are

 command type: 命令的类型

the command type
This is a simple value telling which command (SELECT, INSERT, UPDATE, DELETE) produced the query tree.

range table: 范围表 ,在SQL 的 select 而言,就是 from 后面跟的表名称的列表

the range table
The range table is a list of relations that are used in the query. In a SELECT statement these are the relations given after the FROM key word.

Every range table entry identifies a table or view and tells by which name it is called in the other parts of the query. In the query tree, the range table entries are referenced by number rather than by name, so here it doesn't matter if there are duplicate names as it would in an SQL statement. This can happen after the range tables of rules have been merged in. The examples in this chapter will not have this situation.

the result relation:结果表,对 SQL的 select 而言,可以无视

the result relation
This is an index into the range table that identifies the relation where the results of the query go.

SELECT queries don't have a result relation. (The special case of SELECT INTO is mostly identical to CREATE TABLE followed by INSERT ... SELECT, and is not discussed separately here.)

For INSERT, UPDATE, and DELETE commands, the result relation is the table (or view!) where the changes are to take effect.

target list: 目标列表,对SQL的 select 而言,就是  select 和 from 之间的东西

the target list
The target list is a list of expressions that define the result of the query. In the case of a SELECT, these expressions are the ones that build the final output of the query. They correspond to the expressions between the key words SELECT and FROM. (* is just an abbreviation for all the column names of a relation. It is expanded by the parser into the individual columns, so the rule system never sees it.)

DELETE commands don't need a normal target list because they don't produce any result. Instead, the rule system adds a special CTID entry to the empty target list, to allow the executor to find the row to be deleted. (CTID is added when the result relation is an ordinary table. If it is a view, a whole-row variable is added instead, as described in Section 37.2.4.)

For INSERT commands, the target list describes the new rows that should go into the result relation. It consists of the expressions in the VALUES clause or the ones from the SELECT clause in INSERT ... SELECT. The first step of the rewrite process adds target list entries for any columns that were not assigned to by the original command but have defaults. Any remaining columns (with neither a given value nor a default) will be filled in by the planner with a constant null expression.

For UPDATE commands, the target list describes the new rows that should replace the old ones. In the rule system, it contains just the expressions from the SET column = expression part of the command. The planner will handle missing columns by inserting expressions that copy the values from the old row into the new one. Just as for DELETE, the rule system adds a CTID or whole-row variable so that the executor can identify the old row to be updated.

Every entry in the target list contains an expression that can be a constant value, a variable pointing to a column of one of the relations in the range table, a parameter, or an expression tree made of function calls, constants, variables, operators, etc.

the qualification: 对 sql 而言,就是 where 条件

the qualification
The query's qualification is an expression much like one of those contained in the target list entries. The result value of this expression is a Boolean that tells whether the operation (INSERT, UPDATE, DELETE, or SELECT) for the final result row should be executed or not. It corresponds to the WHERE clause of an SQL statement.

the join tree: 对简单查询而言,和 rangetable 类似。对复杂的join 而言,就需要 join tree了。

the join tree
The query's join tree shows the structure of the FROM clause. For a simple query like SELECT ... FROM a, b, c, the join tree is just a list of the FROM items, because we are allowed to join them in any order. But when JOIN expressions, particularly outer joins, are used, we have to join in the order shown by the joins. In that case, the join tree shows the structure of the JOIN expressions. The restrictions associated with particular JOIN clauses (from ON or USING expressions) are stored as qualification expressions attached to those join-tree nodes. It turns out to be convenient to store the top-level WHERE expression as a qualification attached to the top-level join-tree item, too. So really the join tree represents both the FROM and WHERE clauses of a SELECT.

 再来看看比较简单的SQL 查询得到的 plan tree:

2013-06-06 10:45:17.784 CST,,,2896,,51aff7bd.b50,1,,2013-06-06 10:45:17 CST,,0,LOG,00000,"database system is ready to accept connections",,,,,,,,,""
2013-06-06 10:45:48.335 CST,"postgres","postgres",2961,"[local]",51aff7d1.b91,1,"SELECT",2013-06-06 10:45:37 CST,2/2,0,LOG,00000,"plan:","   {PLANNEDSTMT 
   :commandType 1 
   :queryId 0 
   :hasReturning false 
   :hasModifyingCTE false 
   :canSetTag true 
   :transientPlan false 
   :planTree 
      {SEQSCAN 
      :startup_cost 0.00 
      :total_cost 36.75 
      :plan_rows 713 
      :plan_width 4 
      :targetlist (
         {TARGETENTRY 
         :expr 
            {VAR 
            :varno 1 
            :varattno 1 
            :vartype 23 
            :vartypmod -1 
            :varcollid 0 
            :varlevelsup 0 
            :varnoold 1 
            :varoattno 1 
            :location 7
            }
         :resno 1 
         :resname id 
         :ressortgroupref 0 
         :resorigtbl 16393 
         :resorigcol 1 
         :resjunk false
         }
      )
      :qual (
         {OPEXPR 
         :opno 521 
         :opfuncid 147 
         :opresulttype 16 
         :opretset false 
         :opcollid 0 
         :inputcollid 0 
         :args (
            {VAR 
            :varno 1 
            :varattno 1 
            :vartype 23 
            :vartypmod -1 
            :varcollid 0 
            :varlevelsup 0 
            :varnoold 1 
            :varoattno 1 
            :location 27
            }
            {CONST 
            :consttype 23 
            :consttypmod -1 
            :constcollid 0 
            :constlen 4 
            :constbyval true 
            :constisnull false 
            :location 30 
            :constvalue 4 [ 3 0 0 0 0 0 0 0 ]
            }
         )
         :location 29
         }
      )
      :lefttree <> 
      :righttree <> 
      :initPlan <> 
      :extParam (b)
      :allParam (b)
      :scanrelid 1
      }
   :rtable (
      {RTE 
      :alias <> 
      :eref 
         {ALIAS 
         :aliasname tst04 
         :colnames (""id"" ""val"")
         }
      :rtekind 0 
      :relid 16393 
      :relkind r 
      :inh false 
      :inFromCl true 
      :requiredPerms 2 
      :checkAsUser 0 
      :selectedCols (b 9)
      :modifiedCols (b)
      }
   )
   :resultRelations <> 
   :utilityStmt <> 
   :subplans <> 
   :rewindPlanIDs (b)
   :rowMarks <> 
   :relationOids (o 16393)
   :invalItems <> 
   :nParamExec 0
   }
",,,,,"select id from tst04 where id>3;",,,"psql"
[postgres@lex pg_log]$ 
原文地址:https://www.cnblogs.com/gaojian/p/3120509.html