hadoop学习之路(2)

1.本地安装hadoop(不安装本地hadoop会报错,虽然并不影响远程的环境,但会报错:Failed to locate the winutils binary in the hadoop binary path)

 

 2.启动hadoop环境,dfs,yarn,然后测试代码(DataNode端口与linux设置端口一致)

package org.example;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;

public class HDFSIO {

    // 把本地d盘上的zhang.txt文件上传到HDFS根目录
    @Test
    public void putFileToHDFS() throws IOException, InterruptedException, URISyntaxException{
        
        // 1 获取对象
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");
        
        // 2 获取输入流
        FileInputStream fis = new FileInputStream(new File("d:/zhang.txt"));
        
        // 3 获取输出流
        FSDataOutputStream fos = fs.create(new Path("/zhang.txt"));
        
        // 4 流的对拷
        IOUtils.copyBytes(fis, fos, conf);
        
        // 5 关闭资源
        IOUtils.closeStream(fos);
        IOUtils.closeStream(fis);
        fs.close();
    }
    
    
    // 从HDFS上下载zhang.txt文件到本地e盘上
    @Test
    public void getFileFromHDFS() throws IOException, InterruptedException, URISyntaxException{
        
        // 1 获取对象
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");
        
        // 2 获取输入流
        FSDataInputStream fis = fs.open(new Path("/san.txt"));
        
        // 3 获取输出流
        FileOutputStream fos = new FileOutputStream(new File("d:/san.txt"));
        
        // 4 流的对拷
        IOUtils.copyBytes(fis, fos, conf);
        
        // 5 关闭资源
        IOUtils.closeStream(fos);
        IOUtils.closeStream(fis);
        fs.close();
    }
    
    // 下载第一块
    @Test
    public void readFileSeek1() throws IOException, InterruptedException, URISyntaxException{
        
        // 1 获取对象
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");
        
        // 2 获取输入流
        FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));
        
        // 3 获取输出流
        FileOutputStream fos = new FileOutputStream(new File("d:/hadoop-2.7.2.tar.gz.part1"));
        
        // 4 流的对拷(只拷贝128m)
        byte[] buf = new byte[1024];
        for (int i = 0; i < 1024 * 128; i++) {
            fis.read(buf);
            fos.write(buf);
        }
        
        // 5 关闭资源
        IOUtils.closeStream(fos);
        IOUtils.closeStream(fis);
        fs.close();
    }
    
    // 下载第二块
    @SuppressWarnings("resource")
    @Test
    public void readFileSeek2() throws IOException, InterruptedException, URISyntaxException{
        
        // 1 获取对象
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");
        
        // 2 获取输入流
        FSDataInputStream fis = fs.open(new Path("/hadoop-2.7.2.tar.gz"));
        
        // 3 设置指定读取的起点
        fis.seek(1024*1024*128);
        
        // 4 获取输出流
        FileOutputStream fos = new FileOutputStream(new File("d:/hadoop-2.7.2.tar.gz.part2"));
        
        // 5 流的对拷
        IOUtils.copyBytes(fis, fos, conf);
        
        // 6 关闭资源
        IOUtils.closeStream(fos);
        IOUtils.closeStream(fis);
        fs.close();
    }
    
    
    
    
    
    
    
}
View Code
package org.example;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Test;

public class HDFSClient {

    public static void main(String[] args) throws IOException, Exception, URISyntaxException {
        
        Configuration conf = new Configuration();
//        conf.set("fs.defaultFS", "hdfs://hadoop001:8020");
        
        // 1 获取hdfs客户端对象
//        FileSystem fs = FileSystem.get(conf );
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf, "root");
        
        
        // 2 在hdfs上创建路径
        fs.mkdirs(new Path("/0529/dashen/zhang"));
        
        // 3 关闭资源
        fs.close();
        
        System.out.println("over");
    }
    
    // 1 文件上传
    @Test
    public void testCopyFromLocalFile() throws IOException, InterruptedException, URISyntaxException{
        
        // 1 获取fs对象
        Configuration conf = new Configuration();
        conf.set("dfs.replication", "2");
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");
        
        // 2 执行上传API
        fs.copyFromLocalFile(new Path("d:/zhang.txt"), new Path("/zhang.txt"));
        
        // 3 关闭资源
        fs.close();
    }
    
    // 2 文件下载
    @Test
    public void testCopyToLocalFile() throws IOException, InterruptedException, URISyntaxException{
        
        // 1 获取对象
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");
        
        // 2 执行下载操作
//        fs.copyToLocalFile(new Path("/zhang.txt"), new Path("d:/zhang1.txt"));
        fs.copyToLocalFile(false, new Path("/zhang.txt"), new Path("d:/zhangzhang.txt"), true);
        
        // 3 关闭资源
        fs.close();
    }
    
    
    // 3 文件删除
    @Test
    public void testDelete() throws IOException, InterruptedException, URISyntaxException{
        
        // 1 获取对象
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");
        
        // 2 文件删除
        fs.delete(new Path("/0529"), true);
        
        // 3 关闭资源
        fs.close();
    }
    
    // 4 文件更名
    @Test
    public void testRename() throws IOException, InterruptedException, URISyntaxException{
        
        // 1 获取对象
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");
        
        // 2 执行更名操作
        fs.rename(new Path("/zhang.txt"), new Path("/zhang1.txt"));
        
        // 3 关闭资源
        fs.close();
    }
    
    // 5 文件详情查看
    @Test
    public void testListFiles() throws IOException, InterruptedException, URISyntaxException{
        
        // 1 获取对象
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");
        
        // 2 查看文件详情
        RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
        
        while(listFiles.hasNext()){
            LocatedFileStatus fileStatus = listFiles.next();
            
            // 查看文件名称、权限、长度、块信息
            System.out.println(fileStatus.getPath().getName());// 文件名称
            System.out.println(fileStatus.getPermission());// 文件权限
            System.out.println(fileStatus.getLen());// 文件长度
            
            BlockLocation[] blockLocations = fileStatus.getBlockLocations();
            
            for (BlockLocation blockLocation : blockLocations) {
                
                String[] hosts = blockLocation.getHosts();
                
                for (String host : hosts) {
                    System.out.println(host);
                }
            }
            
            System.out.println("------test分割线--------");
        }
        
        // 3 关闭资源
        fs.close();
    }
    
    
    // 6 判断是文件还是文件夹
    @Test
    public void testListStatus() throws IOException, InterruptedException, URISyntaxException{
        
        // 1 获取对象
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(new URI("hdfs://hadoop001:8020"), conf , "root");
        
        // 2 判断操作
        FileStatus[] listStatus = fs.listStatus(new Path("/"));
        
        for (FileStatus fileStatus : listStatus) {
            
            if (fileStatus.isFile()) {
                // 文件
                System.out.println("f:"+fileStatus.getPath().getName());
            }else{
                // 文件夹
                System.out.println("d:"+fileStatus.getPath().getName());
            }
        }
        
        // 3 关闭资源
        fs.close();
    }
    
    
    
    
    
    
    
    
}
View Code
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>org.example</groupId>
  <artifactId>hdfs01</artifactId>
  <version>1.0-SNAPSHOT</version>

  <name>hdfs01</name>
  <!-- FIXME change it to the project's website -->
  <url>http://www.example.com</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <maven.compiler.source>1.8</maven.compiler.source>
    <maven.compiler.target>1.8</maven.compiler.target>
  </properties>

  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>RELEASE</version>
    </dependency>
    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-core</artifactId>
      <version>2.8.2</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>2.7.2</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>2.7.2</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-hdfs</artifactId>
      <version>2.7.2</version>
    </dependency>
<!--    <dependency>-->
<!--      <groupId>jdk.tools</groupId>-->
<!--      <artifactId>jdk.tools</artifactId>-->
<!--      <version>1.8</version>-->
<!--      <scope>system</scope>-->
<!--      <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>-->
<!--    </dependency>-->
  </dependencies>

  <build>
    <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
      <plugins>
        <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
        <plugin>
          <artifactId>maven-clean-plugin</artifactId>
          <version>3.1.0</version>
        </plugin>
        <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
        <plugin>
          <artifactId>maven-resources-plugin</artifactId>
          <version>3.0.2</version>
        </plugin>
        <plugin>
          <artifactId>maven-compiler-plugin</artifactId>
          <version>3.8.0</version>
        </plugin>
        <plugin>
          <artifactId>maven-surefire-plugin</artifactId>
          <version>2.22.1</version>
        </plugin>
        <plugin>
          <artifactId>maven-jar-plugin</artifactId>
          <version>3.0.2</version>
        </plugin>
        <plugin>
          <artifactId>maven-install-plugin</artifactId>
          <version>2.5.2</version>
        </plugin>
        <plugin>
          <artifactId>maven-deploy-plugin</artifactId>
          <version>2.8.2</version>
        </plugin>
        <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
        <plugin>
          <artifactId>maven-site-plugin</artifactId>
          <version>3.7.1</version>
        </plugin>
        <plugin>
          <artifactId>maven-project-info-reports-plugin</artifactId>
          <version>3.0.0</version>
        </plugin>
      </plugins>
    </pluginManagement>
  </build>
</project>
View Code

原文地址:https://www.cnblogs.com/shun998/p/13583248.html