postgresql插入大量模拟数据总结

1.准备数据:

创建两张表count_group和person_face_flow

-- DROP TABLE public.count_group;

CREATE TABLE public.count_group
(
    id character varying COLLATE pg_catalog."default" NOT NULL,
    name character varying(64) COLLATE pg_catalog."default" NOT NULL,
    type integer NOT NULL,
    is_delete integer NOT NULL DEFAULT 0,
    area integer,
    create_time timestamp with time zone,
    update_time timestamp with time zone,
    region_id character varying(48) COLLATE pg_catalog."default" NOT NULL,
    CONSTRAINT count_group_pkey PRIMARY KEY (id)
)

-- DROP TABLE public.person_face_flow;

CREATE TABLE public.person_face_flow
(
    id character varying(36) COLLATE pg_catalog."default" NOT NULL,
    group_id character varying(36) COLLATE pg_catalog."default" NOT NULL,
    stat_time timestamp without time zone,
    create_time timestamp without time zone NOT NULL,
    update_time timestamp without time zone NOT NULL,
    in_number integer,
    out_number integer,
    gender character varying(20) COLLATE pg_catalog."default",
    age_type character varying(20) COLLATE pg_catalog."default",
    CONSTRAINT person_face_flow_pkey PRIMARY KEY (id)
)

2.生成模拟数据的脚本:

  1. count_group表插入400条模拟数据
create extension IF NOT EXISTS "uuid-ossp";
DO $$

DECLARE formatStr text := 'INSERT INTO count_group (id, name, type, is_delete, 
area,region_id,create_time,update_time)values
((SELECT uuid_generate_v4()), ''统计组_''||''%s'',''%s'',0,''%s'',uuid_generate_v4(),now(),now())';
  DECLARE exe_sql text;

  DECLARE random_base INTEGER := 100;
  DECLARE i int :=1;
  DECLARE gender_array text[] := '{0,1,2,3}';
  DECLARE gender text;

BEGIN
  WHILE i<101 LOOP

    FOREACH gender IN Array gender_array LOOP
        exe_sql = format(formatStr,i,gender,random_base);
        execute exe_sql;
      END LOOP;

    i:=i+1;
  END LOOP;
END
$$;
  1. person_face_flow表插入模拟数据

create extension IF NOT EXISTS "uuid-ossp"; 
DO $$

DECLARE formatStr text := 'INSERT INTO person_face_flow (id, group_id, stat_time, create_time,
update_time,in_number,out_number,gender,age_type) 
SELECT uuid_generate_v4(),''%s'',generate_series(''%s''::TIMESTAMPtz,''%s''::TIMESTAMPtz,''1 day'')
,now(),now(),''%s'',''%s'',''%s'',''%s'';';
DECLARE exe_sql text;

DECLARE start_time TIMESTAMPTZ := '2019-01-01' ;
DECLARE end_time TIMESTAMPTZ := '2019-02-01';
DECLARE temp_end TIMESTAMPTZ;
DECLARE is_random BOOLEAN := true;
DECLARE random_base INTEGER := 100;
DECLARE month_time TIMESTAMPTZ;
DECLARE month_time_end TIMESTAMPTZ;
DECLARE record_item RECORD;

DECLARE gender_array text[] := '{male,female,unknow}';
DECLARE gender text;
DECLARE age_array text[] := '{child,youth,old}';
DECLARE age text;

BEGIN
	FOR record_item IN (SELECT id FROM count_group where is_delete=0) LOOP
        FOREACH gender IN Array gender_array LOOP
            FOREACH age IN Array age_array LOOP
                month_time = start_time;
                month_time_end = end_time;
                while month_time < month_time_end LOOP
                    temp_end = month_time + interval '1 week' - interval '1 day';
                    if(temp_end > end_time) then 
                        temp_end = end_time;
                    end if;
                    exe_sql = format(formatStr,record_item.id,month_time,temp_end, 
                    (random()*100)::int , (random()*100)::int,gender,age);
                    execute exe_sql;
                    month_time = month_time + interval '1 week';
                END LOOP;
            END LOOP;
        END LOOP;
	END LOOP;
END 
$$;

3.总结说明

1. 生成序列

	SELECT generate_series(1,10)

generate_series函数

函数 参数类型 返回类型 描述
generate_series(start, stop) int 或 bigint setof int 或 setof bigint(与参数类型相同) 生成一个数值序列,从start 到 stop,步进为一
generate_series(start, stop, step) int 或 bigint setof int 或 setof bigint(与参数类型相同) 生成一个数值序列,从start 到 stop,步进为step
generate_series(start, stop, step_interval) timestamp or timestamp with time zone timestamp 或 timestamp with time 生成一个数值序列,从start 到 stop,步进为step

2. 生成字符串

生成36位的UUID字符串

	SELECT uuid_generate_v4()

生成32位随机字符串

	select md5(random()::text)

生成重复字符串

	-- abc重复多次的字符串
	SELECT repeat('abc',(random()*4)::integer);
	-- 重复2次的MD5字符串
	select repeat(md5(random()::text),2);

连接符

	select concat('a', ' ', 'b');

3. 生成随机数

生成100以内的随机数字

	select (random()*100)::int

生成100以内,总位数为4,小数点后2位的小数

	SELECT (random()*100.)::numeric(4,2);

4. 生成日期

SELECT date(generate_series(now(), now() + '1 week', '1 day'));
select generate_series(to_date('20130403','yyyymmdd'), t
	o_date('20130404','yyyymmdd'), '3 hours');      
------------------------
 2013-04-03 00:00:00+08
 2013-04-03 03:00:00+08
 2013-04-03 06:00:00+08
 2013-04-03 09:00:00+08
 2013-04-03 12:00:00+08
 2013-04-03 15:00:00+08
 2013-04-03 18:00:00+08
 2013-04-03 21:00:00+08
 2013-04-04 00:00:00+08
(9 rows)

5. 自定义函数

生成随机汉字

	create or replace function gen_hanzi(int) returns text as $$    
	declare    
	  res text;    
	begin    
	  if $1 >=1 then    
	    select string_agg(chr(19968+(random()*20901)::int),'') into res from generate_series(1,$1);    
	    return res;    
	  end if;    
	  return null;    
	end;    
	$$ language plpgsql strict;   
	--输出汉字
	select gen_hanzi(10) from generate_series(1,10); 

生成随机身份证号

create or replace function gen_id(a date, b date)     
returns text as $$    
select lpad((random()*99)::int::text, 2, '0') ||     
       lpad((random()*99)::int::text, 2, '0') ||     
       lpad((random()*99)::int::text, 2, '0') ||     
       to_char(a + (random()*(b-a))::int, 'yyyymmdd') ||     
       lpad((random()*99)::int::text, 2, '0') ||     
       random()::int ||     
       (case when random()*10 >9 then 'X' else (random()*9)::int::text end ) ;    
$$ language sql strict;    

--输出身份证号
 select gen_id('1900-01-01', '2017-10-16') from generate_series(1,10);     
--------------------  
 25614020061108330X  
 49507919010403271X  
 96764619970119860X  
 915005193407306113  
 551360192005045415  
 430005192611170108  
 299138191310237806  
 95149919670723980X  
 542053198501097403  
 482334198309182411  
(10 rows)  

生成随机数组

create or replace function gen_rand_arr(int,int) returns int[] as $$    
  select array_agg((random()*$1)::int) from generate_series(1,$2);    
$$ language sql strict;    
--输出数组
select gen_rand_arr(100,10) from generate_series(1,10);             
---------------------------------  
 {69,11,12,70,7,41,81,95,83,17}  
 {26,79,20,21,64,64,51,90,38,38}  
 {3,64,46,28,26,55,39,12,69,76}  
 {66,38,87,78,8,94,18,88,89,1}  
 {6,14,81,26,36,45,90,87,35,28}  
 {25,38,91,71,67,17,26,5,29,95}  
 {82,94,32,69,72,40,63,90,29,51}  
 {91,34,66,72,60,1,17,50,88,51}  
 {77,13,89,69,84,56,86,10,61,14}  
 {5,43,8,38,11,80,78,74,70,6}  
(10 rows)  

例:


select generate_series(1,5), (random()*100)::int,
(random()*20.)::numeric(4,2), gen_hanzi(3), uuid_generate_v4()

结果:
在这里插入图片描述

原文地址:https://www.cnblogs.com/seasail/p/12179348.html