第一题 数据简单清洗后,插入到hive数据库中。
将ip作为key,通过逗号进行分割后,在通过split("[:]|[/]|[+]")对日期进行分割,然后排列输出。
public static class Map extends Mapper<Object , Text , Text,Text >{ private static Text ip=new Text(); // private static Text date=new Text(); // private static Text type=new Text(); // private static Text id=new Text(); private static Text traffic=new Text(); public void map(Object key,Text value,Context context) throws IOException, InterruptedException{ String line=value.toString(); String arr[]=line.split(","); traffic.set(arr[0]); String str[]=arr[1].split("[:]|[/]|[+]"); String s=str[2]+"-"+"11"+"-"+str[0]+" "+str[3]+":"+str[4]+":"+str[5]; ip.set(s+","+str[0]+","+arr[3]+","+arr[4]+","+arr[5]); context.write(traffic,ip); } } public static class Reduce extends Reducer< IntWritable, Text, Text, Text>{ public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{ for(Text val:values){ context.write(key,val); } } }
输出的格式,应该将逗号换成“ ”
因为在将数据导入到hive数据库中出现了错误。
load data inpath '/testhdfs1026/run/output/result/part-r-00000' into table hive.data;
select * from data;