主键字段使用不同数据类型的简单比较

前几天和朋友讨论数据库建模的时候，说起PK使用的数据类型这个话题。我个人是支持使用int，尤其是sequence生成的无意义数字。不过朋友坚持认为GUID更具有唯一性，并且经过测试，两者性能差异不大。我就做了这个测试用数字来说话。

测试环境
VMWare ESXi 5.0
RHEL 5.1 64bit
Oracle 11gR2 64bit

测试数据
方案1采用int类型，方案2采用sys_guid()产生的RAW(16)。
两种方案中除了字段类型不同之外，其他字段都是随机长度，随机内容的字符串，其中一个字段有索引。保证两种方案具有可比性。

-- 测试的语句，索引查询之后进行join和group。
select count(1), min(c.val1), max(c.val2) from t_p1 p, t_c1 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';

select count(1), min(c.val1), max(c.val2) from t_p2 p, t_c2 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';

测试方法和结果
整个测试中，对两种方案在不同环境下的性能进行了记录。具体脚本在后面给出，这里直接列出测试的结果。

1. 表和索引所占空间

	数据量	int 物理空间（M)	guid 物理空间（M)	PCT
P表	100,000	72	80	90%
C表	1,000,000	208	232	90%

	索引类型	int 物理空间（M)	guid 物理空间（M)	PCT
PK_T_P	主键	2	3	67%
PK_T_C	主键	21	38	55%
IDX_FK_T_C_P	外键索引	23	39	59%
IDX_T_P_CODE	普通索引	10	10	100%

2. 单用户sqlplus中consistent gets的数据

	int	guid	PCT
consistent gets	28,290	29,727	95%

3. 单用户下锁资源占用情况。这里主要对比了latch锁。

int 时间	guid 时间	pct 时间	int latch	guid latch	pct latch
25	26	96.1%	55,297	61,201	90.35%

4. 单用户大数据量数据插入时间

	数据量	int 时间	guid 时间	PCT 时间
P表	100，000	72.60	73.32	99%
C表	1,000,000	506.72	569.31	89%

5. 单用户根据索引删除大表中2.5%比例的数据。查看运行时间和消耗的UNDO

int 时间	guid 时间	pct 时间	int undo	guid undo	pct undo
0.85	1.21	70%	10,910,328	12,092,804	90%

6. 并发环境下，查询运行时间百分比。这里使用的是极短时间内后台提交多个JOB的方式模拟并发。

并发数	int 时间	guid 时间	pct 时间
1	24	25	96%
10	46	50	92%
20	36	45	80%
30	49	60	82%
40	48	52	92%
50	49	53	92%

我的结论
从性能角度考虑，GUID方案在上述几种情况下都处于劣势。不过实话实说，用20%左右的性能差异来换取绝对的全局唯一性，在很多场景还是可以接受的。

以下是测试用到的代码

View Code

------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
-- sample table set 1
drop table t_c1;
drop table t_p1;

create table t_p1 (id int,code varchar2(100),message varchar2(1000));
alter table t_p1 add constraint pk_t_p1 primary key (id);
create index idx_t_p1_code on t_p1 (code);

create table t_c1(id int, pid int, val1 varchar2(100), val2 varchar2(100), val3 varchar2(100));
alter table t_c1 add constraint pk_t_c1 primary key (id);
alter table t_c1 add constraint fk_t_c1_p1 foreign key (pid) references t_p1 (id);
create index idx_fk_t_c1_p1 on t_c1 (pid);

-- init parent table
insert into t_p1
  select rownum, -- id
         dbms_random.string('x', round(dbms_random.value(10, 100))), -- code
         dbms_random.string('x', round(dbms_random.value(100, 1000))) -- message
    from dual
  connect by rownum <= 100000;
commit;

-- init child table
insert into t_c1
  select rownum, --id
         p.id, --pid
         dbms_random.string('x', round(dbms_random.value(10, 100))), -- val1
         dbms_random.string('x', round(dbms_random.value(10, 100))), -- val2
         dbms_random.string('x', round(dbms_random.value(10, 100))) -- val3
    from t_p1 p, (select rownum from dual connect by rownum <= 10) n
   order by dbms_random.value();
commit;


----------------------------------------------------
-- sample table set 2
drop table t_c2 ;
drop table t_p2 ;

create table t_p2 (id raw(16),code varchar2(100),message varchar2(1000));
alter table t_p2 add constraint pk_t_p2 primary key (id);
create index idx_t_p2_code on t_p2 (code);

create table t_c2(id raw(16), pid raw(16), val1 varchar2(100), val2 varchar2(100), val3 varchar2(100));
alter table t_c2 add constraint pk_t_c2 primary key (id);
alter table t_c2 add constraint fk_t_c2_p2 foreign key (pid) references t_p2 (id);
create index idx_fk_t_c2_p2 on t_c2 (pid);

-- init parent table
insert into t_p2
  select sys_guid(), -- id
         dbms_random.string('x', round(dbms_random.value(10, 100))), -- code
         dbms_random.string('x', round(dbms_random.value(100, 1000))) -- message
    from dual
  connect by rownum <= 100000;
commit;

-- init child table
insert into t_c2
  select  sys_guid(), --id
         p.id, --pid
         dbms_random.string('x', round(dbms_random.value(10, 100))), -- val1
         dbms_random.string('x', round(dbms_random.value(10, 100))), -- val2
         dbms_random.string('x', round(dbms_random.value(10, 100))) -- val3
    from t_p2 p, (select rownum from dual connect by rownum <= 10) n
   order by dbms_random.value();
commit;

-----------------------------------------------------
-- stat table
drop table concurrent_stat;
create table concurrent_stat(run_type int, concurrent_count int, delta_time int);
truncate table concurrent_stat;

-----------------------------------------------------
-- collection stat
begin
  dbms_stats.gather_table_stats(user, 'T_P1', cascade => true);
  dbms_stats.gather_table_stats(user, 'T_C1', cascade => true);
  dbms_stats.gather_table_stats(user, 'T_P2', cascade => true);
  dbms_stats.gather_table_stats(user, 'T_C2', cascade => true);
end;
/

------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
create or replace procedure run
(
  p_type       int,
  p_conc_count int := 1
) as
  l_btime int;
  l_etime int;
  l_cnt   int;
  l_min   varchar2(1000);
  l_max   varchar2(1000);
  l_s     varchar2(2);
begin
  select dbms_random.string('U', 1) || '%' into l_s from dual;

  l_btime := dbms_utility.get_time;

  if p_type = 1 then
    select count(1), min(c.val1), max(c.val2)
      into l_cnt, l_min, l_max
      from t_p1 p, t_c1 c
     where p.id = c.pid
       and p.code like l_s;
  else
    select count(1), min(c.val1), max(c.val2)
      into l_cnt, l_min, l_max
      from t_p2 p, t_c2 c
     where p.id = c.pid
       and p.code like l_s;
  end if;

  l_etime := dbms_utility.get_time;

  insert into concurrent_stat
    (run_type, concurrent_count, delta_time)
  values
    (p_type, p_conc_count, l_etime - l_btime);
  commit;
end;

/

------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
-- compare table and index size
select s.segment_name, s.segment_type, bytes / 1024 / 1024 as size_mb
  from dba_segments s
 where s.owner = user
 order by s.segment_name;


-- join p and c to get some values
select count(1), min(c.val1), max(c.val2) from t_p1 p, t_c1 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';

select count(1), min(c.val1), max(c.val2) from t_p2 p, t_c2 c where p.id = c.pid and p.code like dbms_random.string('U',1)||'%';


------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
-- show difference between running statistics
begin
  my_rs.rs_start;
  run(1);
  my_rs.rs_middle;
  run(2);
  my_rs.rs_stop(100);
end;
/


------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
select s.concurrent_count, s.run_type, count(1) as cnt, round(avg(s.delta_time)) as delta
  from concurrent_stat s
 group by s.concurrent_count, s.run_type
 order by s.concurrent_count, s.run_type ;

-- display pct under different concurrent conditions
select v.*, round(v.delta1 / v.delta2, 2) as pct_1_2
  from (select s.concurrent_count,
               sum(decode(s.run_type, 1, s.delta)) as delta1,
               sum(decode(s.run_type, 2, s.delta)) as delta2
          from (select s.concurrent_count,
                       s.run_type,
                       round(avg(s.delta_time)) as delta
                  from concurrent_stat s
                 group by s.concurrent_count, s.run_type) s
         group by s.concurrent_count) v
 order by v.concurrent_count;

------------------------------------------------------------------------------------
truncate table concurrent_stat;
------------------------------------------------------------------------------------
-- concurrent run
declare
  l_type             int := 1;
  l_concurrent_count int := 40;
  l_job_name         varchar2(30);
  l_job_action       varchar2(1000);
begin
  l_job_action := '
    begin
      run(' || l_type || ',' ||
                  l_concurrent_count || ');
    end;';

  for i in 1 .. l_concurrent_count loop
    l_job_name := 'j_' || l_type || '_' || i;
    dbms_scheduler.create_job(job_name   => l_job_name,
                              job_type   => 'PLSQL_BLOCK',
                              job_action => l_job_action,
                              start_date => systimestamp,
                              auto_drop  => true,
                              enabled    => true);
  end loop;
  commit;
end;

/


------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
-- get undo stat for deleting operation
select n.name, m.value
  from v$mystat m, v$statname n
 where n.STATISTIC# = m.STATISTIC#
   and n.name = 'undo change vector size';

delete t_c1 c where exists (select 1 from t_p1 p where p.id = c.pid and p.code like 'A%' );

select n.name, m.value
  from v$mystat m, v$statname n
 where n.STATISTIC# = m.STATISTIC#
   and n.name = 'undo change vector size';