集群因子概念


SQL> select count(distinct dbms_rowid.rowid_block_number(rowid)) from test where object_id<90;

COUNT(DISTINCTDBMS_ROWID.ROWID_BLOCK_NUMBER(ROWID))
---------------------------------------------------
						  1

SQL> select count(distinct dbms_rowid.rowid_block_number(rowid)) from test where object_id<91;

COUNT(DISTINCTDBMS_ROWID.ROWID_BLOCK_NUMBER(ROWID))
---------------------------------------------------
						  2


 

 
 
测试:
create table t1 as select trunc((rownum-1)/100) id,
                  rpad(rownum,100) t_pad
                  from dba_source
              where rownum<100000;
 
create index t1_idx1 on t1(id);
 
select a.column_name,
          b.num_rows,
           a.num_distinct Cardinality,
           round(a.num_distinct / b.num_rows * 100, 2) selectivity,
          a.histogram,
          a.num_buckets
      from dba_tab_col_statistics a, dba_tables b
     where a.owner = b.owner
       and a.table_name = b.table_name
      and a.owner = 'TEST'
      and a.table_name = 'T1';

      
      select owner, table_name name, object_type, stale_stats, last_analyzed
  from dba_tab_statistics
 where table_name in ('T1')
   and owner = 'TEST'
   and (stale_stats = 'YES' or last_analyzed is null);


BEGIN
  DBMS_STATS.GATHER_TABLE_STATS(ownname          => 'TEST',
                                tabname          => 'T1',
                                estimate_percent => 100,
                                method_opt       => 'for all columns size skewonly',
                                no_invalidate    => FALSE,
                                degree           => 8,
                                cascade          => TRUE);
END;
 
 
 
SQL> select index_name,clustering_factor from user_indexes where table_name='T1';  
 
INDEX_NAME		       CLUSTERING_FACTOR
------------------------------ -----------------
T1_IDX1 				    1536
 
 
SQL>  select count(distinct dbms_rowid.rowid_block_number(rowid)) from t1;
 
COUNT(DISTINCTDBMS_ROWID.ROWID_BLOCK_NUMBER(ROWID))
---------------------------------------------------
					       1536


SQL> select count(*) from t1;

  COUNT(*)
----------
     99999



如果clustering factor 接近block 数，说明表的存储和索引存储排序接近，也就是说表中的记录很有序，这样在做index range scan 的时候能，

读取少量的data block 就能得到我们想要的数据，代价比较小。


如果clustering factor 接近表记录数，说明表的存储和索引排序差异很大，

在做index range scan 的时候，会额外读取多个block，因为表记录分散，代价较高。


 
SQL> select * from (select id,count(*) from t1 group by id order by id,count(*)) where rownum<10;
 
	ID   COUNT(*)
---------- ----------
	 0	  100
	 1	  100
	 2	  100
	 3	  100
	 4	  100
	 5	  100
	 6	  100
	 7	  100
	 8	  100
 
9 rows selected.
 
 

select count(distinct dbms_rowid.rowid_block_number(rowid)) from T1 where id<11;

SQL>  select * from t1 where id=1;

已选择100行。


执行计划
----------------------------------------------------------
Plan hash value: 2623418078

---------------------------------------------------------------------------------------
| Id  | Operation		    | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT	    |	      |   100 | 10500 |     3	(0)| 00:00:01 |
|   1 |  TABLE ACCESS BY INDEX ROWID| T1      |   100 | 10500 |     3	(0)| 00:00:01 |
|*  2 |   INDEX RANGE SCAN	    | T1_IDX1 |   100 |       |     1	(0)| 00:00:01 |
---------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("ID"=1)


统计信息
----------------------------------------------------------
	  1  recursive calls
	  0  db block gets
	 19  consistent gets
	  5  physical reads
	  0  redo size
      12650  bytes sent via SQL*Net to client
	590  bytes received via SQL*Net from client
	  8  SQL*Net roundtrips to/from client
	  0  sorts (memory)
	  0  sorts (disk)
	100  rows processed

 
集群因子接近表的块数,索引效率较高。
 
 
 
2)
 
create table t2 as select mod(rownum,100) id,
                  rpad(rownum,100) t_pad
                  from dba_source
              where rownum<100000;
     create index t1_idx2 on t2(id);
         BEGIN
  DBMS_STATS.GATHER_TABLE_STATS(ownname          => 'TEST',
                                tabname          => 'T2',
                                estimate_percent => 100,
                                method_opt       => 'for all columns size skewonly',
                                no_invalidate    => FALSE,
                                degree           => 8,
                                cascade          => TRUE);
END;
 
SQL> select index_name,clustering_factor from user_indexes where table_name='T2';
 
INDEX_NAME		       CLUSTERING_FACTOR
------------------------------ -----------------
T1_IDX2 				   99999
 
SQL> select count(*) from t2;
 
  COUNT(*)
----------
     99999
 
SQL> select count(*) from t2 where id=1;
 
  COUNT(*)
----------
      1000
 
SQL> select 1000/99999 * 100 from dual;
 
1000/99999*100
--------------
       1.00001
 
这个数据比例应该走索引啊！
 
 
 
SQL> select * from t2 where id=1;
 
1000 rows selected.
 
 
SQL> set pagesize 200
SQL> set autot trace
SQL> select * from t2 where id=1;
 
1000 rows selected.
 
 
Execution Plan
----------------------------------------------------------
Plan hash value: 1513984157
 
--------------------------------------------------------------------------
| Id  | Operation	  | Name | Rows  | Bytes | Cost (%CPU)| Time	 |
--------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |	 |  1000 |   101K|   423   (1)| 00:00:06 |
|*  1 |  TABLE ACCESS FULL| T2	 |  1000 |   101K|   423   (1)| 00:00:06 |
--------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   1 - filter("ID"=1)
 
 
Statistics
----------------------------------------------------------
	  0  recursive calls
	  0  db block gets
       1588  consistent gets
       1517  physical reads
	  0  redo size
     118709  bytes sent via SQL*Net to client
       1246  bytes received via SQL*Net from client
	 68  SQL*Net roundtrips to/from client
	  0  sorts (memory)
	  0  sorts (disk)
       1000  rows processed
 
 
确走全表扫描了,逻辑读为1588
 
强制走索引的逻辑读:
 
SQL> select /*+ index(t2 T1_IDX2)*/   * from t2 where id=1;
 
1000 rows selected.
 
 
Execution Plan
----------------------------------------------------------
Plan hash value: 1418564783
 
---------------------------------------------------------------------------------------
| Id  | Operation		    | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT	    |	      |  1000 |   101K|  1002	(0)| 00:00:13 |
|   1 |  TABLE ACCESS BY INDEX ROWID| T2      |  1000 |   101K|  1002	(0)| 00:00:13 |
|*  2 |   INDEX RANGE SCAN	    | T1_IDX2 |  1000 |       |     2	(0)| 00:00:01 |
---------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - access("ID"=1)
 
 
Statistics
----------------------------------------------------------
	  0  recursive calls
	  0  db block gets
       1071  consistent gets
       1004  physical reads
	  0  redo size
     121909  bytes sent via SQL*Net to client
       1246  bytes received via SQL*Net from client
	 68  SQL*Net roundtrips to/from client
	  0  sorts (memory)
	  0  sorts (disk)
       1000  rows processed