今天来对极限测试的三个表继续做清洗
需要将数字改成对应的时间
我毫无想法,学习了同学的方法
// 清洗
insert overwrite table sales_sample_info_clean select date_add('2021-09-00',cast(day_id as int)) as day_id ,sale_nbr as sale_nbr,buy_nbr as buy_nbr,cnt as cnt ,round as round from sales_sample_info;
先将数据导入一个表,然后在重新插入到新的表中
也可以这样
清洗表中数据将数字改成日期,将数据改变后写入另一个表
create table saless(day_id String,sale_nbr string,buy_nbr string,cnt int,round int) Row format delimited fields terminated by ',';
insert overwrite table saless select from_unixtime(day_id*86400+1630399509,'yyyy-MM-dd') as day_id,sale_nbr, buy_nbr,cnt,round from sales;
简单的前期操作
创建存储所有数据的表,并导入数据
create table sales(day_id bigint,sale_nbr string,buy_nbr string,cnt int,round int) Row format delimited fields terminated by ',';
load data local inpath '/opt/software/apache-hive-2.3.9-bin/sale.csv' into table sales;