Haddop的数据计算部分原理


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.*;
import java.util.*;

public class WorkConut {

    public static void main(String[] args) throws IOException {
        HashMap<String,Integer> map=new HashMap();
        Configuration conf=new Configuration();

        //连接对象
        FileSystem fileSystem = FileSystem.get(conf);

        //读数据
        FSDataInputStream open = fileSystem.open(new Path("E:\wc.txt"));

        //处理数据
        BufferedReader reader = new BufferedReader(new InputStreamReader(open));

        //读取每一行数据
        String line=null;
        while ((line=reader.readLine())!=null){
         String[] splies=line.split(" ");

         //逻辑


            for (String word:splies) {
                //当Map集合中有这个key时，就使用这个key值；
                //  如果没有就使用默认值defaultValue。
            Integer count= map.getOrDefault(word,0);

            count++;

            map.put(word,count);

            }
        }
        //写数据
        FSDataOutputStream create = fileSystem.create(new Path("E:\resoult.txt"));

        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(create));



        //循环遍历map
        Set<Map.Entry<String, Integer>>  entries  = map.entrySet();

        //排序
       ArrayList<Map.Entry<String,Integer>> list= new ArrayList<>(entries);

       list.sort(new Comparator<Map.Entry<String, Integer>>() {
           @Override
           public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
               return o2.getValue()-o1.getValue();
           }
       });



        for (Map.Entry<String,Integer> entry: list) {
           writer.write(entry.getKey()+"="+entry.getValue()+"
");
                      writer.flush();
        }

        //关流
        reader.close();
        writer.close();

    }

}
必备添加：D://wc.txt存在。且有数据
Hadoop windows下环境
D://text1.txt不存在