Spark- ERROR Shell: Failed to locate the winutils binary in the hadoop binary path java.io.IOException: Could not locate executable nullinwinutils.exe in the Hadoop binaries.

运行

mport org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
 * Created by Lee_Rz on 2017/8/30.
 */
object SparkDemo {
  def main(args: Array[String]) {
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
    val sc: SparkContext = new SparkContext(new SparkConf().setAppName(this.getClass().getName()).setMaster("local[2]"))
    val rdd1: RDD[String] = sc.textFile("C:\Users\166\Desktop\text.txt")   //一行一行的读数据   //懒算子
    val key: RDD[(String, Int)] = rdd1.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
    println(key.collect().toBuffer)//收集到Driver
  }
}

报错

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
17/09/02 13:01:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
17/09/02 13:01:17 INFO Slf4jLogger: Slf4jLogger started
17/09/02 13:01:17 INFO Remoting: Starting remoting
17/09/02 13:01:17 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriverActorSystem@192.168.0.166:51388]
17/09/02 13:01:18 ERROR Shell: Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable nullinwinutils.exe in the Hadoop binaries.
    at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:278)
    at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:300)
    at org.apache.hadoop.util.Shell.<clinit>(Shell.java:293)
    at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:76)
    at org.apache.hadoop.mapred.FileInputFormat.setInputPaths(FileInputFormat.java:362)
    at org.apache.spark.SparkContext$$anonfun$hadoopFile$1$$anonfun$33.apply(SparkContext.scala:1015)
    at org.apache.spark.SparkContext$$anonfun$hadoopFile$1$$anonfun$33.apply(SparkContext.scala:1015)
    at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:176)
    at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:176)
    at scala.Option.map(Option.scala:145)
    at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:176)
    at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:195)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
    at org.apache.spark.Partitioner$.defaultPartitioner(Partitioner.scala:65)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$reduceByKey$3.apply(PairRDDFunctions.scala:331)
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$reduceByKey$3.apply(PairRDDFunctions.scala:331)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
    at org.apache.spark.rdd.PairRDDFunctions.reduceByKey(PairRDDFunctions.scala:330)
    at zx.SparkDemo$.main(SparkDemo.scala:15)
    at zx.SparkDemo.main(SparkDemo.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)
17/09/02 13:01:19 INFO FileInputFormat: Total input paths to process : 1
17/09/02 13:01:19 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
17/09/02 13:01:19 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
17/09/02 13:01:19 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
17/09/02 13:01:19 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
17/09/02 13:01:19 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id
ArrayBuffer((are,2), (hello,1), (any,1), (ok,4), (world,1), (me,1), (alone,1), (you,2), (no,1), (believie,1), (more,1))
17/09/02 13:01:19 INFO RemoteActorRefProvider$RemotingTerminator: Shutting down remote daemon.

Process finished with exit code 0

检查发现hadoop下bin目录下已经存在winutils.exe,检查hadoop的path路径,发现没有严格按照格式创建hadoop的path,真确的格式是HADOOP_HOME=......,因为在hadoop的生态圈中很多框架都是依赖hadoop的,所以他们的配置文件中,默认的export的hadoop路径是格式是HADOOP_HOME

原文地址:https://www.cnblogs.com/RzCong/p/7466480.html