Haddop的数据计算部分原理


import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.
*; import java.util.*; public class WorkConut { public static void main(String[] args) throws IOException { HashMap<String,Integer> map=new HashMap(); Configuration conf=new Configuration(); //连接对象 FileSystem fileSystem = FileSystem.get(conf); //读数据 FSDataInputStream open = fileSystem.open(new Path("E:\wc.txt")); //处理数据 BufferedReader reader = new BufferedReader(new InputStreamReader(open)); //读取每一行数据 String line=null; while ((line=reader.readLine())!=null){ String[] splies=line.split(" "); //逻辑 for (String word:splies) { //当Map集合中有这个key时,就使用这个key值; //  如果没有就使用默认值defaultValue。 Integer count= map.getOrDefault(word,0); count++; map.put(word,count); } } //写数据 FSDataOutputStream create = fileSystem.create(new Path("E:\resoult.txt")); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(create)); //循环遍历map Set<Map.Entry<String, Integer>> entries = map.entrySet(); //排序 ArrayList<Map.Entry<String,Integer>> list= new ArrayList<>(entries); list.sort(new Comparator<Map.Entry<String, Integer>>() { @Override public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { return o2.getValue()-o1.getValue(); } }); for (Map.Entry<String,Integer> entry: list) { writer.write(entry.getKey()+"="+entry.getValue()+" "); writer.flush(); } //关流 reader.close(); writer.close(); } }

必备添加:D://wc.txt存在。且有数据

Hadoop windows下环境

D://text1.txt不存在

原文地址:https://www.cnblogs.com/wangshuang123/p/10914007.html