Oracle 直方图实例测试

--创建表
SQL> create table tab (a number, b number); 

Table created.

--插入数据
SQL> begin                                    
  2   for i in 1..10000 loop
  3     insert into tab values (i, i);
  4   end loop;
  5   commit;
  6  end;
  7  /

PL/SQL procedure successfully completed.

--更新部分数据
SQL> update tab set b=5 where b between 6 and 9995;

9990 rows updated.

--提交
SQL> commit;

Commit complete.

--创建索引
SQL> create index ix_tab_b on tab(b);

Index created.

--数据分布
SQL> SELECT b,count(*) from tab group by b  order by 1;

         B   COUNT(*)
---------- ----------
         1          1
         2          1
         3          1
         4          1
         5       9991
      9996          1
      9997          1
      9998          1
      9999          1
     10000          1

10 rows selected.

SQL> 


--禁止产生直方图(size 1)
SQL> BEGIN
  2    DBMS_STATS.GATHER_TABLE_STATS(OWNNAME    => 'NC50',
  3                                  TABNAME    => 'TAB',
  4                                  CASCADE    => TRUE,
  5                                  METHOD_OPT => 'FOR  COLUMNS B SIZE 1 ');
  6  END;
  7  /

PL/SQL procedure successfully completed.

SQL>

--查看视图USER_TAB_HISTOGRAMS
SQL> col COLUMN_NAME format a20
SQL> col TABLE_NAME format a15
SQL> SELECT table_name,column_name,endpoint_number,endpoint_value FROM USER_TAB_HISTOGRAMS WHERE TABLE_NAME='TAB';

TABLE_NAME      COLUMN_NAME          ENDPOINT_NUMBER ENDPOINT_VALUE
--------------- -------------------- --------------- --------------
TAB             B                                  0              1
TAB             B                                  1          10000

SQL>

--注:ENDPOINT_NUMBER列值是bucket的标识号。现在只有0、1两个bucket说明没有产生直方图信息。




--在没有直方图的情况下,在B列上进行等值查询的时候,都是索引范围扫描(即使返回总数据量的80%)


SQL> set autotrace traceonly explain
SQL> set line 180
SQL> select * from tab where b=1;

Execution Plan
----------------------------------------------------------
Plan hash value: 439197569

----------------------------------------------------------------------------------------
| Id  | Operation                   | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
----------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT            |          |  1000 |  7000 |     4   (0)| 00:00:01 |
|   1 |  TABLE ACCESS BY INDEX ROWID| TAB      |  1000 |  7000 |     4   (0)| 00:00:01 |
|*  2 |   INDEX RANGE SCAN          | IX_TAB_B |  1000 |       |     2   (0)| 00:00:01 |
----------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("B"=1)

SQL> 


SQL> select * from tab where b=5;---返回9991条数据,总数据量的80%

Execution Plan
----------------------------------------------------------
Plan hash value: 439197569

----------------------------------------------------------------------------------------
| Id  | Operation                   | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
----------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT            |          |  1000 |  7000 |     4   (0)| 00:00:01 |
|   1 |  TABLE ACCESS BY INDEX ROWID| TAB      |  1000 |  7000 |     4   (0)| 00:00:01 |
|*  2 |   INDEX RANGE SCAN          | IX_TAB_B |  1000 |       |     2   (0)| 00:00:01 |
----------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("B"=5)

SQL> 


--收集直方图信息。看看是什么效果。由于列B唯一值的个数没有超过254因此产生的是等频直方图。

SQL> BEGIN
  2    DBMS_STATS.GATHER_TABLE_STATS(OWNNAME    => 'NC50',
  3                                  TABNAME    => 'TAB',
  4                                  CASCADE    => TRUE,
  5                                  METHOD_OPT => 'FOR  COLUMNS B SIZE AUTO ');
  6  END;
  7  /

PL/SQL procedure successfully completed.

SQL> 

--在B=1时候采用索引扫描

SQL> set autotrace traceonly explain
SQL> select * from tab where b=1;

Execution Plan
----------------------------------------------------------
Plan hash value: 439197569

----------------------------------------------------------------------------------------
| Id  | Operation                   | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
----------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT            |          |     1 |     7 |     2   (0)| 00:00:01 |
|   1 |  TABLE ACCESS BY INDEX ROWID| TAB      |     1 |     7 |     2   (0)| 00:00:01 |
|*  2 |   INDEX RANGE SCAN          | IX_TAB_B |     1 |       |     1   (0)| 00:00:01 |
----------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("B"=1)


--在B=5时候,已经采用全表扫描了,说明直方图起了作用
SQL> select * from tab where b=5;

Execution Plan
----------------------------------------------------------
Plan hash value: 1995730731

--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |  9991 | 69937 |     9   (0)| 00:00:01 |
|*  1 |  TABLE ACCESS FULL| TAB  |  9991 | 69937 |     9   (0)| 00:00:01 |
--------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   1 - filter("B"=5)

SQL>


--查看此时的直方图信息:
SQL> set autotrace off;
SQL> col TABLE_NAME format a10
SQL> col COLUMN_NAME format a20
SQL> SELECT TABLE_NAME, COLUMN_NAME, ENDPOINT_NUMBER, ENDPOINT_VALUE 
  2  FROM USER_TAB_HISTOGRAMS
  3  WHERE TABLE_NAME = 'TAB';

TABLE_NAME COLUMN_NAME          ENDPOINT_NUMBER ENDPOINT_VALUE
---------- -------------------- --------------- --------------
TAB        B                                  1              1
TAB        B                                  2              2
TAB        B                                  3              3
TAB        B                                  4              4
TAB        B                               9995              5
TAB        B                               9996           9996
TAB        B                               9997           9997
TAB        B                               9998           9998
TAB        B                               9999           9999
TAB        B                              10000          10000

10 rows selected.

SQL> 

--其中EDNPOINT_NUMBER是累计值,EDNPOINT_NUMBER(N)-EDNPOINT_NUMBER(N-1)=当前桶的数据个数=9995-4=9991
--EDNPOINT_VALUE是列的值。



--在看看等高直方图的情况,由于列B有10个唯一值,通过size 8可以强制ORACLE使用等高直方图
SQL> BEGIN
  2    DBMS_STATS.GATHER_TABLE_STATS(OWNNAME    => 'NC50',
  3                                  TABNAME    => 'TAB',
  4                                  CASCADE    => TRUE,
  5                                  METHOD_OPT => 'FOR  COLUMNS B SIZE 8 ');
  6  END;
  7  /

PL/SQL procedure successfully completed.

SQL> SELECT TABLE_NAME, COLUMN_NAME, ENDPOINT_NUMBER, ENDPOINT_VALUE FROM USER_TAB_HISTOGRAMS
  2  WHERE TABLE_NAME = 'TAB' ;  
WHERE TABLE_NAME = 'TAB'
                        *
ERROR at line 2:
ORA-00933: SQL command not properly ended


SQL> SELECT TABLE_NAME, COLUMN_NAME, ENDPOINT_NUMBER, ENDPOINT_VALUE 
  2  FROM USER_TAB_HISTOGRAMS
  3  WHERE TABLE_NAME = 'TAB';

TABLE_NAME COLUMN_NAME          ENDPOINT_NUMBER ENDPOINT_VALUE
---------- -------------------- --------------- --------------
TAB        B                                  0              1
TAB        B                                  7              5
TAB        B                                  8          10000

SQL>

--从查询结果惊奇的发现只有三个桶0 7 8

--说明:在等高直方图中,EDNPOINT_NUMBER代表桶号,这一点与等频直方图不同。

SQL> set autotrace traceonly explain;
SQL> select * from tab where b=5;

Execution Plan
----------------------------------------------------------
Plan hash value: 1995730731

--------------------------------------------------------------------------
| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |  8750 | 61250 |     9   (0)| 00:00:01 |
|*  1 |  TABLE ACCESS FULL| TAB  |  8750 | 61250 |     9   (0)| 00:00:01 |
--------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   1 - filter("B"=5)

SQL> 


--发现执行计划的ROWS部分,ORACLE计算出来的cardinality不是特别精确的。9991才是精确值。而等频直方图可以精确到9991,因此可以说等频直方图比等高直方图稳定,精确。可是现实很多时候,列的唯一值是超过254的。只能使用等高直方图了。
原文地址:https://www.cnblogs.com/polestar/p/4331009.html