spark sql createOrReplaceTempView registerTempTable

createOrReplaceTempView2.x版本以上。
registerTempTable1.5.x
  val data1 = dataSelect1(sqlContext, sparkModel)
  val data2 = dataSelect2(sqlContext, sparkModel)
  data1.createOrReplaceTempView("new_table1_info")
  data2.createOrReplaceTempView("new_table2_info")
 
  val sql='select t1.*,t2.* from new_table1_info t1 left join new_table2_info t2 on t1.name=t2.name2' where t1.age>18 ;
  val result = sqlContext.sql(sql)
 
  def dataSelect1(sqlContext: HiveContext, sm: SparkModel): DataFrame = {
    val sql = "select name,age,other  from table1 where name !='' "
    sqlContext.sql(sql)
  }
  
 
  def dataSelect2(sqlContext: HiveContext, sm: SparkModel): DataFrame = {
    val sql = "select name2,age2,other2,hh  from table2 where name !='' "
    sqlContext.sql(sql)
  } 

如果一次处理后的数据太多,一下字发送kafka就会导致kafka对赛,那就然他睡几毫秒。不可可根据业务数据调试。每分钟100万左右就可以了吧。

result.foreach(x => {
      val json = new JSONObject(x)
      kafka.value.send(topic, json.toString)
      Thread.sleep(everynum.toInt)
    })
原文地址:https://www.cnblogs.com/shaozhiqi/p/11599091.html