tpch-kudu

1.在impala里建立好文本表:

create external table customer (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/customer';
create external table lineitem (L_ORDERKEY INT, L_PARTKEY INT, L_SUPPKEY INT, L_LINENUMBER INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/lineitem';
create external table nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/nation';
create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/orders';
create external table part (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/part';
create external table partsupp (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION'/user/training/tpch_10g/partsupp';
create external table region (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_100g/region';
create external table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/user/training/tpch_10g/supplier';

3.建立kudu-impala表

create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
TBLPROPERTIES(
  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
  'kudu.table_name' = 'customer_kudu',
  'kudu.master_addresses' = 'node1:7051',
  'kudu.key_columns' = 'C_CUSTKEY'
);
insert into customer_kudu select * from customer;

create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
TBLPROPERTIES(
  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
  'kudu.table_name' = 'nation_kudu',
  'kudu.master_addresses' = 'node1:7051',
  'kudu.key_columns' = 'N_NATIONKEY'
);
insert into nation_kudu select * from nation;


create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
TBLPROPERTIES(
  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
  'kudu.table_name' = 'part_kudu',
  'kudu.master_addresses' = 'node1:7051',
  'kudu.key_columns' = 'P_PARTKEY'
);
insert into part_kudu select * from part;


create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
TBLPROPERTIES(
  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
  'kudu.table_name' = 'supplier_kudu',
  'kudu.master_addresses' = 'node1:7051',
  'kudu.key_columns' = 'S_SUPPKEY'
);
insert into supplier_kudu select * from supplier;



create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) 
TBLPROPERTIES(
  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
  'kudu.table_name' = 'partsupp_kudu',
  'kudu.master_addresses' = 'node1:7051',
  'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
);
insert into partsupp_kudu select * from partsupp;

create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) 
TBLPROPERTIES(
  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
  'kudu.table_name' = 'region_kudu',
  'kudu.master_addresses' = 'node1:7051',
  'kudu.key_columns' = 'R_REGIONKEY'
);
insert into region_kudu select * from region;


create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
TBLPROPERTIES(
  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
  'kudu.table_name' = 'orders_kudu',
  'kudu.master_addresses' = 'node1:7051',
  'kudu.key_columns' = 'O_ORDERKEY'
);
insert into orders_kudu select * from orders;


create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
TBLPROPERTIES(
  'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
  'kudu.table_name' = 'lineitem_kudu',
  'kudu.master_addresses' = 'node1:7051',
  'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
);
insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem;

下面是日志信息:

[node2:21000] > use tpch_10g;
Query: use tpch_10g
[node2:21000] > show tables;
Query: show tables
+----------+
| name     |
+----------+
| customer |
| lineitem |
| nation   |
| orders   |
| part     |
| partsupp |
| region   |
| supplier |
+----------+
Fetched 8 row(s) in 0.01s
[node2:21000] > create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
              > TBLPROPERTIES(
              >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
              >   'kudu.table_name' = 'customer_kudu',
              >   'kudu.master_addresses' = 'node1:7051',
              >   'kudu.key_columns' = 'C_CUSTKEY'
              > );
Query: create table customer_kudu (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'customer_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'C_CUSTKEY'
)

Fetched 0 row(s) in 0.68s
[node2:21000] > insert into customer_kudu select * from customer;
Query: insert into customer_kudu select * from customer
Inserted 1500000 row(s) in 19.02s
[node2:21000] > show tables;
Query: show tables
+---------------+
| name          |
+---------------+
| customer      |
| customer_kudu |
| lineitem      |
| nation        |
| orders        |
| part          |
| partsupp      |
| region        |
| supplier      |
+---------------+
Fetched 9 row(s) in 0.01s
[node2:21000] > create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
              > TBLPROPERTIES(
              >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
              >   'kudu.table_name' = 'nation_kudu',
              >   'kudu.master_addresses' = 'node1:7051',
              >   'kudu.key_columns' = 'N_NATIONKEY'
              > );
Query: create table nation_kudu (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'nation_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'N_NATIONKEY'
)

Fetched 0 row(s) in 0.72s
[node2:21000] > insert into nation_kudu select * from nation;
Query: insert into nation_kudu select * from nation
Inserted 25 row(s) in 4.26s
[node2:21000] > create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
              > TBLPROPERTIES(
              >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
              >   'kudu.table_name' = 'part_kudu',
              >   'kudu.master_addresses' = 'node1:7051',
              >   'kudu.key_columns' = 'P_PARTKEY'
              > );
Query: create table part_kudu (P_PARTKEY INT, P_NAME STRING, P_MFGR STRING, P_BRAND STRING, P_TYPE STRING, P_SIZE INT, P_CONTAINER STRING, P_RETAILPRICE DOUBLE, P_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'part_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'P_PARTKEY'
)

Fetched 0 row(s) in 0.68s
[node2:21000] > insert into part_kudu select * from part;
Query: insert into part_kudu select * from part
Inserted 2000000 row(s) in 22.71s
[node2:21000] > create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
              > TBLPROPERTIES(
              >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
              >   'kudu.table_name' = 'supplier_kudu',
              >   'kudu.master_addresses' = 'node1:7051',
              >   'kudu.key_columns' = 'S_SUPPKEY'
              > );
Query: create table supplier_kudu (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'supplier_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'S_SUPPKEY'
)

Fetched 0 row(s) in 0.73s
[node2:21000] > insert into supplier_kudu select * from supplier_kudu;
Query: insert into supplier_kudu select * from supplier_kudu
Inserted 0 row(s) in 4.67s
[node2:21000] > create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING) 
              > TBLPROPERTIES(
              >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
              >   'kudu.table_name' = 'partsupp_kudu',
              >   'kudu.master_addresses' = 'node1:7051',
              >   'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
              > );
Query: create table partsupp_kudu (PS_PARTKEY INT, PS_SUPPKEY INT, PS_AVAILQTY INT, PS_SUPPLYCOST DOUBLE, PS_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'partsupp_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'PS_PARTKEY,PS_SUPPKEY'
)

Fetched 0 row(s) in 0.72s
[node2:21000] > insert into partsupp_kudu select * from partsupp_kudu;
Query: insert into partsupp_kudu select * from partsupp_kudu
Inserted 0 row(s) in 3.07s
[node2:21000] > insert into supplier_kudu select * from supplier;
Query: insert into supplier_kudu select * from supplier
Inserted 100000 row(s) in 1.78s
[node2:21000] > insert into partsupp_kudu select * from partsupp;
Query: insert into partsupp_kudu select * from partsupp
Inserted 8000000 row(s) in 64.44s
[node2:21000] > create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING) 
              > TBLPROPERTIES(
              >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
              >   'kudu.table_name' = 'region_kudu',
              >   'kudu.master_addresses' = 'node1:7051',
              >   'kudu.key_columns' = 'R_REGIONKEY'
              > );
Query: create table region_kudu (R_REGIONKEY INT, R_NAME STRING, R_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'region_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'R_REGIONKEY'
)

Fetched 0 row(s) in 0.72s
[node2:21000] > insert into region_kudu select * from region;
Query: insert into region_kudu select * from region
Inserted 5 row(s) in 4.05s


[node2:21000] > create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
              > TBLPROPERTIES(
              >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
              >   'kudu.table_name' = 'lineitem_kudu',
              >   'kudu.master_addresses' = 'node1:7051',
              >   'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
              > );
Query: create table lineitem_kudu (L_ORDERKEY INT, L_LINENUMBER INT, L_PARTKEY INT, L_SUPPKEY INT, L_QUANTITY DOUBLE, L_EXTENDEDPRICE DOUBLE, L_DISCOUNT DOUBLE, L_TAX DOUBLE, L_RETURNFLAG STRING, L_LINESTATUS STRING, L_SHIPDATE STRING, L_COMMITDATE STRING, L_RECEIPTDATE STRING, L_SHIPINSTRUCT STRING, L_SHIPMODE STRING, L_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'lineitem_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'L_ORDERKEY,L_LINENUMBER'
)

Fetched 0 row(s) in 0.72s
[node2:21000] > insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem;
Query: insert into lineitem_kudu select L_ORDERKEY, L_LINENUMBER, L_PARTKEY, L_SUPPKEY, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, L_COMMITDATE, L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT from lineitem
Inserted 59986052 row(s) in 951.42s

[node2:21000] > create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
              > TBLPROPERTIES(
              >   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
              >   'kudu.table_name' = 'orders_kudu',
              >   'kudu.master_addresses' = 'node1:7051',
              >   'kudu.key_columns' = 'O_ORDERKEY'
              > );
Query: create table orders_kudu (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING)
TBLPROPERTIES(
'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
'kudu.table_name' = 'orders_kudu',
'kudu.master_addresses' = 'node1:7051',
'kudu.key_columns' = 'O_ORDERKEY'
)

Fetched 0 row(s) in 0.86s
[node2:21000] > insert into orders_kudu select * from orders;
Query: insert into orders_kudu select * from orders
Inserted 15000000 row(s) in 133.11s
[node2:21000] > 

3.计算表的统计信息:

compute stats customer;
compute stats lineitem;
compute stats nation;
compute stats orders;
compute stats part;
compute stats partsupp;
compute stats region;
compute stats supplier;

 

 compute stats customer_kudu;
 ompute stats lineitem_kudu;
 compute stats nation_kudu;
 compute stats orders_kudu;
 compute stats part_kudu;
 compute stats partsupp_kudu;
 compute stats region_kudu;
 compute stats supplier_kudu;

原文地址:https://www.cnblogs.com/littlesuccess/p/4867888.html