package com.udemyexamples
import org.apache.spark.sql.SparkSession
object AverageFriendByAge {
def parseFile(line:String): Unit =
{
val field= line.split(",")
val age=field(2).toInt
val friend=field(3).toInt
(age,friend)
}
def main(args: Array[String]): Unit = {
val spark=SparkSession.builder()
.master("local")
.appName("AverageFriendAge")
.getOrCreate()
val sc=spark.sparkContext
.textFile("C:\\SparkScala\\SparkScalaStudy\\fakefriend.csv")
val rdd=sc.map(parseFile)
val y= rdd.**mapValues**(x => (x, 1))
}
}
您首先需要SparkSession
的实例,您的代码应类似于:
val spark = SparkSession
.builder()
.appName("dataFrameExample")
.master("local")
.getOrCreate()
import spark.implicits._