spark 机器学习

val rawData = sc.textFile("/user/zhaoja/test/input/xxt.txt")
val data = rawData.map { line =>
val values = line.split(',').map(_.toDouble)

//创建向量
val featureVector = Vectors.dense(values.init)
val label = values.last - 1

//含有类标签的点
LabeledPoint(label, featureVector)
}

 

scala> data.take(1)
res70: Array[org.apache.spark.mllib.regression.LabeledPoint] = Array((0.0,[9.0,21.0,3.0,4.0,5.0]))
原文地址:https://www.cnblogs.com/huanhuanang/p/7373680.html